aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2012-04-14 13:54:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2012-04-14 13:54:10 +0000
commit63faed5b8e4f2755f127fcb8aa440480c0649327 (patch)
tree19c69a04768629f2d440944b71cbe90adae0b615 /lib
parentd4c8b5d2e851b0e8a063c6bf8543a4823a26c15a (diff)
downloadsrc-63faed5b8e4f2755f127fcb8aa440480c0649327.tar.gz
src-63faed5b8e4f2755f127fcb8aa440480c0649327.zip
Vendor import of llvm trunk r154661:vendor/llvm/llvm-trunk-r154661
Notes
Notes: svn path=/vendor/llvm/dist/; revision=234285 svn path=/vendor/llvm/llvm-trunk-r154661/; revision=234286; tag=vendor/llvm/llvm-trunk-r154661
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp16
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp6
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp4
-rw-r--r--lib/Analysis/AliasSetTracker.cpp4
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp59
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp2
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp401
-rw-r--r--lib/Analysis/CFGPrinter.cpp8
-rw-r--r--lib/Analysis/CMakeLists.txt7
-rw-r--r--lib/Analysis/CaptureTracking.cpp95
-rw-r--r--lib/Analysis/CodeMetrics.cpp184
-rw-r--r--lib/Analysis/ConstantFolding.cpp373
-rw-r--r--lib/Analysis/DIBuilder.cpp112
-rw-r--r--lib/Analysis/DebugInfo.cpp110
-rw-r--r--lib/Analysis/DominanceFrontier.cpp2
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt6
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp13
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp6
-rw-r--r--lib/Analysis/IPA/LLVMBuild.txt23
-rw-r--r--lib/Analysis/IVUsers.cpp67
-rw-r--r--lib/Analysis/InlineCost.cpp1439
-rw-r--r--lib/Analysis/InstructionSimplify.cpp1167
-rw-r--r--lib/Analysis/LLVMBuild.txt25
-rw-r--r--lib/Analysis/LazyValueInfo.cpp123
-rw-r--r--lib/Analysis/Lint.cpp19
-rw-r--r--lib/Analysis/Loads.cpp16
-rw-r--r--lib/Analysis/LoopDependenceAnalysis.cpp2
-rw-r--r--lib/Analysis/LoopInfo.cpp127
-rw-r--r--lib/Analysis/LoopPass.cpp23
-rw-r--r--lib/Analysis/MemDepPrinter.cpp2
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp8
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp94
-rw-r--r--lib/Analysis/PHITransAddr.cpp13
-rw-r--r--lib/Analysis/PathNumbering.cpp4
-rw-r--r--lib/Analysis/PathProfileVerifier.cpp16
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp2
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp4
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp18
-rw-r--r--lib/Analysis/RegionInfo.cpp8
-rw-r--r--lib/Analysis/ScalarEvolution.cpp466
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp497
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--lib/Analysis/SparsePropagation.cpp8
-rw-r--r--lib/Analysis/Trace.cpp2
-rw-r--r--lib/Analysis/ValueTracking.cpp989
-rw-r--r--lib/Archive/ArchiveReader.cpp33
-rw-r--r--lib/Archive/ArchiveWriter.cpp6
-rw-r--r--lib/Archive/CMakeLists.txt6
-rw-r--r--lib/Archive/LLVMBuild.txt22
-rw-r--r--lib/AsmParser/CMakeLists.txt5
-rw-r--r--lib/AsmParser/LLLexer.cpp62
-rw-r--r--lib/AsmParser/LLLexer.h1
-rw-r--r--lib/AsmParser/LLParser.cpp127
-rw-r--r--lib/AsmParser/LLParser.h15
-rw-r--r--lib/AsmParser/LLToken.h4
-rw-r--r--lib/AsmParser/LLVMBuild.txt22
-rw-r--r--lib/AsmParser/Parser.cpp2
-rw-r--r--lib/Bitcode/LLVMBuild.txt24
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp680
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h40
-rw-r--r--lib/Bitcode/Reader/CMakeLists.txt5
-rw-r--r--lib/Bitcode/Reader/LLVMBuild.txt22
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp318
-rw-r--r--lib/Bitcode/Writer/CMakeLists.txt5
-rw-r--r--lib/Bitcode/Writer/LLVMBuild.txt22
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp44
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h4
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp29
-rw-r--r--lib/CodeGen/AllocationOrder.cpp2
-rw-r--r--lib/CodeGen/AllocationOrder.h3
-rw-r--r--lib/CodeGen/Analysis.cpp73
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp19
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp302
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp45
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt11
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp42
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h87
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp287
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h290
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp299
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h68
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp427
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h56
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp46
-rw-r--r--lib/CodeGen/AsmPrinter/LLVMBuild.txt22
-rw-r--r--lib/CodeGen/BranchFolding.cpp129
-rw-r--r--lib/CodeGen/CMakeLists.txt27
-rw-r--r--lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--lib/CodeGen/CodeGen.cpp17
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp9
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp72
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h3
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp223
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp31
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp703
-rw-r--r--lib/CodeGen/ELF.h227
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp205
-rw-r--r--lib/CodeGen/ELFCodeEmitter.h78
-rw-r--r--lib/CodeGen/ELFWriter.cpp1105
-rw-r--r--lib/CodeGen/ELFWriter.h251
-rw-r--r--lib/CodeGen/EdgeBundles.cpp2
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp526
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp14
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp12
-rw-r--r--lib/CodeGen/GCMetadata.cpp6
-rw-r--r--lib/CodeGen/GCStrategy.cpp160
-rw-r--r--lib/CodeGen/IfConversion.cpp132
-rw-r--r--lib/CodeGen/InlineSpiller.cpp191
-rw-r--r--lib/CodeGen/InterferenceCache.cpp35
-rw-r--r--lib/CodeGen/InterferenceCache.h15
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp5
-rw-r--r--lib/CodeGen/JITCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/LLVMBuild.txt25
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp414
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp6
-rw-r--r--lib/CodeGen/LexicalScopes.cpp2
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp23
-rw-r--r--lib/CodeGen/LiveInterval.cpp36
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp2012
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp2
-rw-r--r--lib/CodeGen/LiveIntervalUnion.h2
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp110
-rw-r--r--lib/CodeGen/LiveRangeEdit.h206
-rw-r--r--lib/CodeGen/LiveVariables.cpp114
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp10
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp219
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp1001
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp65
-rw-r--r--lib/CodeGen/MachineCSE.cpp108
-rw-r--r--lib/CodeGen/MachineCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp340
-rw-r--r--lib/CodeGen/MachineFunction.cpp93
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp5
-rw-r--r--lib/CodeGen/MachineInstr.cpp254
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp278
-rw-r--r--lib/CodeGen/MachineLICM.cpp608
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp18
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp1
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp65
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp7
-rw-r--r--lib/CodeGen/MachineScheduler.cpp614
-rw-r--r--lib/CodeGen/MachineSink.cpp182
-rw-r--r--lib/CodeGen/MachineVerifier.cpp462
-rw-r--r--lib/CodeGen/ObjectCodeEmitter.cpp141
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp9
-rw-r--r--lib/CodeGen/PHIElimination.cpp13
-rw-r--r--lib/CodeGen/Passes.cpp607
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp35
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp256
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp36
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp41
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h4
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp2
-rw-r--r--lib/CodeGen/RegAllocBase.cpp280
-rw-r--r--lib/CodeGen/RegAllocBase.h36
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp316
-rw-r--r--lib/CodeGen/RegAllocFast.cpp321
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp110
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp1543
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp173
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp16
-rw-r--r--lib/CodeGen/RegisterClassInfo.h2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp252
-rw-r--r--lib/CodeGen/RegisterCoalescer.h6
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp67
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp19
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp62
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp68
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp654
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h212
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp24
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt14
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1120
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp215
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/LLVMBuild.txt22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1251
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp134
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp140
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp163
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp657
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp650
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp130
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h40
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp (renamed from lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp)85
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1050
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp629
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h18
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp631
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp215
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp3
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp7
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp775
-rw-r--r--lib/CodeGen/SlotIndexes.cpp12
-rw-r--r--lib/CodeGen/Spiller.cpp81
-rw-r--r--lib/CodeGen/Spiller.h1
-rw-r--r--lib/CodeGen/SplitKit.cpp85
-rw-r--r--lib/CodeGen/SplitKit.h15
-rw-r--r--lib/CodeGen/Splitter.cpp827
-rw-r--r--lib/CodeGen/Splitter.h101
-rw-r--r--lib/CodeGen/StackProtector.cpp7
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp358
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp18
-rw-r--r--lib/CodeGen/TailDuplication.cpp39
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp (renamed from lib/Target/TargetFrameLowering.cpp)2
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp106
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp123
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp52
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp525
-rw-r--r--lib/CodeGen/VirtRegMap.cpp168
-rw-r--r--lib/CodeGen/VirtRegMap.h335
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp2633
-rw-r--r--lib/CodeGen/VirtRegRewriter.h32
-rw-r--r--lib/DebugInfo/CMakeLists.txt4
-rw-r--r--lib/DebugInfo/DWARFContext.cpp2
-rw-r--r--lib/DebugInfo/DWARFContext.h1
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.cpp2
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.h6
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.cpp5
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp7
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.cpp4
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.h4
-rw-r--r--lib/DebugInfo/DWARFDebugLine.cpp7
-rw-r--r--lib/DebugInfo/DWARFFormValue.cpp20
-rw-r--r--lib/DebugInfo/LLVMBuild.txt22
-rw-r--r--lib/ExecutionEngine/CMakeLists.txt17
-rw-r--r--lib/ExecutionEngine/EventListenerCommon.h67
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp110
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp2
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt11
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp183
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt23
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/Makefile17
-rw-r--r--lib/ExecutionEngine/Interpreter/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp29
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp31
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h7
-rw-r--r--lib/ExecutionEngine/Interpreter/LLVMBuild.txt22
-rw-r--r--lib/ExecutionEngine/JIT/CMakeLists.txt11
-rw-r--r--lib/ExecutionEngine/JIT/Intercept.cpp162
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp45
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h11
-rw-r--r--lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp211
-rw-r--r--lib/ExecutionEngine/JIT/JITDebugRegisterer.h116
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp43
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp194
-rw-r--r--lib/ExecutionEngine/JIT/LLVMBuild.txt22
-rw-r--r--lib/ExecutionEngine/LLVMBuild.txt25
-rw-r--r--lib/ExecutionEngine/MCJIT/CMakeLists.txt10
-rw-r--r--lib/ExecutionEngine/MCJIT/Intercept.cpp162
-rw-r--r--lib/ExecutionEngine/MCJIT/LLVMBuild.txt22
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp41
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h17
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp14
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h47
-rw-r--r--lib/ExecutionEngine/Makefile13
-rw-r--r--lib/ExecutionEngine/OProfileJIT/CMakeLists.txt7
-rw-r--r--lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt23
-rw-r--r--lib/ExecutionEngine/OProfileJIT/Makefile18
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp (renamed from lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp)99
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp263
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt22
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp446
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp262
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h62
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h236
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp577
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h70
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp41
-rw-r--r--lib/LLVMBuild.txt24
-rw-r--r--lib/Linker/CMakeLists.txt8
-rw-r--r--lib/Linker/LLVMBuild.txt22
-rw-r--r--lib/Linker/LinkArchives.cpp5
-rw-r--r--lib/Linker/LinkModules.cpp513
-rw-r--r--lib/Linker/Linker.cpp1
-rw-r--r--lib/MC/CMakeLists.txt6
-rw-r--r--lib/MC/ELFObjectWriter.cpp935
-rw-r--r--lib/MC/ELFObjectWriter.h446
-rw-r--r--lib/MC/LLVMBuild.txt25
-rw-r--r--lib/MC/MCAsmBackend.cpp21
-rw-r--r--lib/MC/MCAsmInfo.cpp4
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp15
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp7
-rw-r--r--lib/MC/MCAsmStreamer.cpp106
-rw-r--r--lib/MC/MCAssembler.cpp124
-rw-r--r--lib/MC/MCCodeGenInfo.cpp4
-rw-r--r--lib/MC/MCContext.cpp60
-rw-r--r--lib/MC/MCDisassembler/CMakeLists.txt24
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp18
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h10
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.cpp110
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.h23
-rw-r--r--lib/MC/MCDisassembler/EDMain.cpp280
-rw-r--r--lib/MC/MCDisassembler/EDOperand.cpp20
-rw-r--r--lib/MC/MCDisassembler/LLVMBuild.txt22
-rw-r--r--lib/MC/MCDwarf.cpp459
-rw-r--r--lib/MC/MCELF.cpp4
-rw-r--r--lib/MC/MCELFObjectTargetWriter.cpp29
-rw-r--r--lib/MC/MCELFStreamer.cpp158
-rw-r--r--lib/MC/MCELFStreamer.h141
-rw-r--r--lib/MC/MCExpr.cpp42
-rw-r--r--lib/MC/MCInst.cpp2
-rw-r--r--lib/MC/MCInstPrinter.cpp6
-rw-r--r--lib/MC/MCLoggingStreamer.cpp250
-rw-r--r--lib/MC/MCMachOStreamer.cpp27
-rw-r--r--lib/MC/MCModule.cpp2
-rw-r--r--lib/MC/MCNullStreamer.cpp14
-rw-r--r--lib/MC/MCObjectFileInfo.cpp101
-rw-r--r--lib/MC/MCObjectStreamer.cpp52
-rw-r--r--lib/MC/MCObjectWriter.cpp14
-rw-r--r--lib/MC/MCParser/AsmParser.cpp277
-rw-r--r--lib/MC/MCParser/CMakeLists.txt5
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp17
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp1
-rw-r--r--lib/MC/MCParser/LLVMBuild.txt22
-rw-r--r--lib/MC/MCParser/MCAsmLexer.cpp4
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp4
-rw-r--r--lib/MC/MCPureStreamer.cpp15
-rw-r--r--lib/MC/MCStreamer.cpp109
-rw-r--r--lib/MC/MCSymbol.cpp7
-rw-r--r--lib/MC/MachObjectWriter.cpp15
-rw-r--r--lib/MC/SubtargetFeature.cpp24
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp89
-rw-r--r--lib/MC/WinCOFFStreamer.cpp20
-rw-r--r--lib/Object/Archive.cpp142
-rw-r--r--lib/Object/CMakeLists.txt5
-rw-r--r--lib/Object/COFFObjectFile.cpp295
-rw-r--r--lib/Object/ELFObjectFile.cpp1411
-rw-r--r--lib/Object/LLVMBuild.txt22
-rw-r--r--lib/Object/MachOObject.cpp35
-rw-r--r--lib/Object/MachOObjectFile.cpp694
-rw-r--r--lib/Object/Object.cpp150
-rw-r--r--lib/Object/ObjectFile.cpp4
-rw-r--r--lib/Support/APFloat.cpp123
-rw-r--r--lib/Support/APInt.cpp219
-rw-r--r--lib/Support/Allocator.cpp4
-rw-r--r--lib/Support/Atomic.cpp2
-rw-r--r--lib/Support/BlockFrequency.cpp12
-rw-r--r--lib/Support/BranchProbability.cpp13
-rw-r--r--lib/Support/CMakeLists.txt7
-rw-r--r--lib/Support/CommandLine.cpp26
-rw-r--r--lib/Support/ConstantRange.cpp100
-rw-r--r--lib/Support/CrashRecoveryContext.cpp1
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp3
-rw-r--r--lib/Support/DataExtractor.cpp2
-rw-r--r--lib/Support/DataStream.cpp98
-rw-r--r--lib/Support/Dwarf.cpp2
-rw-r--r--lib/Support/FileUtilities.cpp1
-rw-r--r--lib/Support/FoldingSet.cpp26
-rw-r--r--lib/Support/GraphWriter.cpp77
-rw-r--r--lib/Support/Hashing.cpp29
-rw-r--r--lib/Support/Host.cpp13
-rw-r--r--lib/Support/IntrusiveRefCntPtr.cpp14
-rw-r--r--lib/Support/JSONParser.cpp302
-rw-r--r--lib/Support/LLVMBuild.txt21
-rw-r--r--lib/Support/LockFileManager.cpp216
-rw-r--r--lib/Support/ManagedStatic.cpp8
-rw-r--r--lib/Support/MemoryBuffer.cpp38
-rw-r--r--lib/Support/Mutex.cpp119
-rw-r--r--lib/Support/Path.cpp36
-rw-r--r--lib/Support/PathV2.cpp179
-rw-r--r--lib/Support/Program.cpp1
-rw-r--r--lib/Support/RWMutex.cpp104
-rw-r--r--lib/Support/SmallPtrSet.cpp51
-rw-r--r--lib/Support/SourceMgr.cpp141
-rw-r--r--lib/Support/Statistic.cpp16
-rw-r--r--lib/Support/StreamableMemoryObject.cpp140
-rw-r--r--lib/Support/StringExtras.cpp21
-rw-r--r--lib/Support/StringMap.cpp61
-rw-r--r--lib/Support/StringRef.cpp168
-rw-r--r--lib/Support/TargetRegistry.cpp2
-rw-r--r--lib/Support/ThreadLocal.cpp2
-rw-r--r--lib/Support/Threading.cpp8
-rw-r--r--lib/Support/Timer.cpp16
-rw-r--r--lib/Support/Triple.cpp642
-rw-r--r--lib/Support/Unix/Host.inc13
-rw-r--r--lib/Support/Unix/Path.inc16
-rw-r--r--lib/Support/Unix/PathV2.inc85
-rw-r--r--lib/Support/Unix/Process.inc8
-rw-r--r--lib/Support/Unix/Program.inc12
-rw-r--r--lib/Support/Unix/Signals.inc20
-rw-r--r--lib/Support/Valgrind.cpp13
-rw-r--r--lib/Support/Windows/Host.inc5
-rw-r--r--lib/Support/Windows/Path.inc35
-rw-r--r--lib/Support/Windows/PathV2.inc170
-rw-r--r--lib/Support/Windows/Process.inc4
-rw-r--r--lib/Support/Windows/Program.inc36
-rw-r--r--lib/Support/Windows/Signals.inc4
-rw-r--r--lib/Support/Windows/Windows.h106
-rw-r--r--lib/Support/YAMLParser.cpp2117
-rw-r--r--lib/Support/raw_ostream.cpp1
-rw-r--r--lib/TableGen/CMakeLists.txt5
-rw-r--r--lib/TableGen/Error.cpp4
-rw-r--r--lib/TableGen/LLVMBuild.txt22
-rw-r--r--lib/TableGen/Record.cpp173
-rw-r--r--lib/TableGen/TGLexer.cpp57
-rw-r--r--lib/TableGen/TGLexer.h6
-rw-r--r--lib/TableGen/TGParser.cpp554
-rw-r--r--lib/TableGen/TGParser.h73
-rw-r--r--lib/TableGen/TableGenAction.cpp15
-rw-r--r--lib/TableGen/TableGenBackend.cpp4
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp165
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h5
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp665
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h18
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp220
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h28
-rw-r--r--lib/Target/ARM/ARMBuildAttrs.h2
-rw-r--r--lib/Target/ARM/ARMCallingConv.h21
-rw-r--r--lib/Target/ARM/ARMCallingConv.td38
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp25
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1323
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp8
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h2
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.cpp33
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.h1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp581
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp1317
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp454
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h5
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp28
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h8
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp284
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1150
-rw-r--r--lib/Target/ARM/ARMISelLowering.h29
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td112
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td753
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td3103
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td127
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td405
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td494
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMJITInfo.h2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp214
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp10
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp14
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h19
-rw-r--r--lib/Target/ARM/ARMPerfectShuffle.h2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td188
-rw-r--r--lib/Target/ARM/ARMRelocations.h2
-rw-r--r--lib/Target/ARM/ARMSchedule.td10
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td19
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td36
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td12
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp10
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp12
-rw-r--r--lib/Target/ARM/ARMSubtarget.h18
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp108
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h24
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp34
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmLexer.cpp9
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp3562
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/CMakeLists.txt46
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp1359
-rw-r--r--lib/Target/ARM/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp192
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h34
-rw-r--r--lib/Target/ARM/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/LLVMBuild.txt35
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp180
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h36
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp283
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h24
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp76
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp24
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp116
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt9
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp10
-rw-r--r--lib/Target/ARM/README.txt21
-rw-r--r--lib/Target/ARM/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/ARM/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp19
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp22
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h8
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp35
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp31
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h8
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp141
-rw-r--r--lib/Target/Alpha/Alpha.h43
-rw-r--r--lib/Target/Alpha/Alpha.td68
-rw-r--r--lib/Target/Alpha/AlphaAsmPrinter.cpp166
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp66
-rw-r--r--lib/Target/Alpha/AlphaCallingConv.td38
-rw-r--r--lib/Target/Alpha/AlphaFrameLowering.cpp143
-rw-r--r--lib/Target/Alpha/AlphaFrameLowering.h43
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp425
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp962
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h142
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td268
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp382
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h85
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td1159
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp158
-rw-r--r--lib/Target/Alpha/AlphaMachineFunctionInfo.h62
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp199
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h56
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.td133
-rw-r--r--lib/Target/Alpha/AlphaRelocations.h31
-rw-r--r--lib/Target/Alpha/AlphaSchedule.td85
-rw-r--r--lib/Target/Alpha/AlphaSelectionDAGInfo.cpp23
-rw-r--r--lib/Target/Alpha/AlphaSelectionDAGInfo.h31
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.cpp35
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.h49
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp51
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h66
-rw-r--r--lib/Target/Alpha/CMakeLists.txt38
-rw-r--r--lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp23
-rw-r--r--lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp78
-rw-r--r--lib/Target/Alpha/MCTargetDesc/CMakeLists.txt11
-rw-r--r--lib/Target/Alpha/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/Alpha/Makefile21
-rw-r--r--lib/Target/Alpha/README.txt42
-rw-r--r--lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp20
-rw-r--r--lib/Target/Alpha/TargetInfo/CMakeLists.txt13
-rw-r--r--lib/Target/Blackfin/Blackfin.h31
-rw-r--r--lib/Target/Blackfin/Blackfin.td202
-rw-r--r--lib/Target/Blackfin/BlackfinAsmPrinter.cpp156
-rw-r--r--lib/Target/Blackfin/BlackfinCallingConv.td30
-rw-r--r--lib/Target/Blackfin/BlackfinFrameLowering.cpp130
-rw-r--r--lib/Target/Blackfin/BlackfinFrameLowering.h47
-rw-r--r--lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp180
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp645
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.h83
-rw-r--r--lib/Target/Blackfin/BlackfinInstrFormats.td34
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp256
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h81
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td862
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp104
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsicInfo.h32
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsics.td34
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp344
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h77
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.td277
-rw-r--r--lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp24
-rw-r--r--lib/Target/Blackfin/BlackfinSelectionDAGInfo.h31
-rw-r--r--lib/Target/Blackfin/BlackfinSubtarget.cpp44
-rw-r--r--lib/Target/Blackfin/BlackfinSubtarget.h49
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.cpp43
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.h68
-rw-r--r--lib/Target/Blackfin/CMakeLists.txt38
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp22
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h29
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp81
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h38
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt11
-rw-r--r--lib/Target/Blackfin/Makefile23
-rw-r--r--lib/Target/Blackfin/README.txt244
-rw-r--r--lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp21
-rw-r--r--lib/Target/Blackfin/TargetInfo/CMakeLists.txt13
-rw-r--r--lib/Target/CBackend/CBackend.cpp3617
-rw-r--r--lib/Target/CBackend/CMakeLists.txt17
-rw-r--r--lib/Target/CBackend/CTargetMachine.h42
-rw-r--r--lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp21
-rw-r--r--lib/Target/CBackend/TargetInfo/CMakeLists.txt11
-rw-r--r--lib/Target/CBackend/TargetInfo/Makefile15
-rw-r--r--lib/Target/CMakeLists.txt46
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt27
-rw-r--r--lib/Target/CellSPU/CellSDKIntrinsics.td2
-rw-r--r--lib/Target/CellSPU/LLVMBuild.txt32
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt5
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp2
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h4
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp8
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h2
-rw-r--r--lib/Target/CellSPU/SPU.h2
-rw-r--r--lib/Target/CellSPU/SPU.td4
-rw-r--r--lib/Target/CellSPU/SPU128InstrInfo.td4
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td2
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp3
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td4
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp5
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.h2
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp18
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp142
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h5
-rw-r--r--lib/Target/CellSPU/SPUInstrBuilder.h2
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td6
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp2
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.cpp14
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.h3
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td2
-rw-r--r--lib/Target/CellSPU/SPUNodes.td2
-rw-r--r--lib/Target/CellSPU/SPUNopFiller.cpp2
-rw-r--r--lib/Target/CellSPU/SPUOperands.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp8
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h4
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td6
-rw-r--r--lib/Target/CellSPU/SPUSchedule.td6
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp3
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp41
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h20
-rw-r--r--lib/Target/CellSPU/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CellSPU/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/CppBackend/CMakeLists.txt9
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp124
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h8
-rw-r--r--lib/Target/CppBackend/LLVMBuild.txt31
-rw-r--r--lib/Target/CppBackend/Makefile2
-rw-r--r--lib/Target/CppBackend/TargetInfo/CMakeLists.txt5
-rw-r--r--lib/Target/CppBackend/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt37
-rw-r--r--lib/Target/Hexagon/Hexagon.h74
-rw-r--r--lib/Target/Hexagon/Hexagon.td72
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp313
-rwxr-xr-xlib/Target/Hexagon/HexagonAsmPrinter.h165
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp235
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp207
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h189
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp177
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp332
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h50
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp644
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1485
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp1496
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h162
-rw-r--r--lib/Target/Hexagon/HexagonImmediates.td508
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td308
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td67
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp2732
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h185
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td3052
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td137
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td5746
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td3462
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsDerived.td29
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV3.td50
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td369
-rw-r--r--lib/Target/Hexagon/HexagonMCInst.h41
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp93
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h75
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp288
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp315
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h90
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td167
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp82
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td54
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td59
-rw-r--r--lib/Target/Hexagon/HexagonSelectCCInfo.td121
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp46
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h40
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp129
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp62
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h74
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp145
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h83
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp94
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h40
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp3642
-rw-r--r--lib/Target/Hexagon/HexagonVarargsCallingConvention.h141
-rw-r--r--lib/Target/Hexagon/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp198
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h75
-rw-r--r--lib/Target/Hexagon/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/InstPrinter/Makefile (renamed from lib/Target/Blackfin/TargetInfo/Makefile)6
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt32
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h70
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp36
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h (renamed from lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h)13
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp95
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h (renamed from lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h)21
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/Makefile (renamed from lib/Target/SystemZ/MCTargetDesc/Makefile)4
-rw-r--r--lib/Target/Hexagon/Makefile23
-rw-r--r--lib/Target/Hexagon/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp (renamed from lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp)10
-rw-r--r--lib/Target/Hexagon/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/TargetInfo/Makefile (renamed from lib/Target/Alpha/TargetInfo/Makefile)4
-rw-r--r--lib/Target/LLVMBuild.txt56
-rw-r--r--lib/Target/MBlaze/AsmParser/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp10
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp3
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt34
-rw-r--r--lib/Target/MBlaze/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/MBlaze/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp31
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h6
-rw-r--r--lib/Target/MBlaze/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/MBlaze/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h8
-rw-r--r--lib/Target/MBlaze/LLVMBuild.txt34
-rw-r--r--lib/Target/MBlaze/MBlaze.td2
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp22
-rw-r--r--lib/Target/MBlaze/MBlazeELFWriterInfo.cpp12
-rw-r--r--lib/Target/MBlaze/MBlazeELFWriterInfo.h1
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp39
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.h3
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp24
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h6
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td26
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h4
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td171
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp16
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.h2
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td2
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.cpp11
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.h3
-rw-r--r--lib/Target/MBlaze/MBlazeMachineFunction.cpp14
-rw-r--r--lib/Target/MBlaze/MBlazeMachineFunction.h5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp10
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h4
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeRelocations.h2
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td2
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule3.td2
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule5.td2
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.h2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp51
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h9
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp53
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h5
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp77
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h4
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp19
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp13
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h8
-rw-r--r--lib/Target/MBlaze/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MBlaze/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/MSP430/CMakeLists.txt26
-rw-r--r--lib/Target/MSP430/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/MSP430/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp1
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h7
-rw-r--r--lib/Target/MSP430/LLVMBuild.txt32
-rw-r--r--lib/Target/MSP430/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp3
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h8
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp11
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h2
-rw-r--r--lib/Target/MSP430/MSP430.td2
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp6
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp40
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430InstrFormats.td2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp32
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td2
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp11
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h3
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.cpp14
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp14
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h11
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp4
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h6
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp35
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h10
-rw-r--r--lib/Target/MSP430/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MSP430/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/Mangler.cpp11
-rw-r--r--lib/Target/Mips/AsmParser/CMakeLists.txt6
-rw-r--r--lib/Target/Mips/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/Mips/AsmParser/Makefile (renamed from lib/Target/Blackfin/MCTargetDesc/Makefile)7
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp66
-rw-r--r--lib/Target/Mips/CMakeLists.txt32
-rw-r--r--lib/Target/Mips/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/Mips/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/Mips/InstPrinter/Makefile2
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp70
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h16
-rw-r--r--lib/Target/Mips/LLVMBuild.txt34
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/Mips/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp260
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h126
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp249
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h122
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp256
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp45
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h25
-rw-r--r--lib/Target/Mips/Makefile4
-rw-r--r--lib/Target/Mips/Mips.h2
-rw-r--r--lib/Target/Mips/Mips.td10
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td237
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp153
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h63
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp230
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h20
-rw-r--r--lib/Target/Mips/MipsCallingConv.td56
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp223
-rw-r--r--lib/Target/Mips/MipsCondMov.td194
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp33
-rw-r--r--lib/Target/Mips/MipsEmitGPRestore.cpp13
-rw-r--r--lib/Target/Mips/MipsExpandPseudo.cpp20
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp204
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp460
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1297
-rw-r--r--lib/Target/Mips/MipsISelLowering.h25
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td268
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td41
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp107
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h91
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td654
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp17
-rw-r--r--lib/Target/Mips/MipsJITInfo.h4
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp337
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h20
-rw-r--r--lib/Target/Mips/MipsMCSymbolRefExpr.cpp70
-rw-r--r--lib/Target/Mips/MipsMCSymbolRefExpr.h67
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp50
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h26
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp256
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h7
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td136
-rw-r--r--lib/Target/Mips/MipsRelocations.h10
-rw-r--r--lib/Target/Mips/MipsSchedule.td2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp22
-rw-r--r--lib/Target/Mips/MipsSubtarget.h8
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp120
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h42
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp2
-rw-r--r--lib/Target/Mips/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/Mips/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/PTX/CMakeLists.txt24
-rw-r--r--lib/Target/PTX/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/PTX/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp77
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.h8
-rw-r--r--lib/Target/PTX/LLVMBuild.txt32
-rw-r--r--lib/Target/PTX/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/PTX/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h71
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h6
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp11
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h2
-rw-r--r--lib/Target/PTX/PTX.h1
-rw-r--r--lib/Target/PTX/PTX.td2
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp344
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.h4
-rw-r--r--lib/Target/PTX/PTXFPRoundingModePass.cpp6
-rw-r--r--lib/Target/PTX/PTXFrameLowering.cpp2
-rw-r--r--lib/Target/PTX/PTXFrameLowering.h2
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp168
-rw-r--r--lib/Target/PTX/PTXISelLowering.h9
-rw-r--r--lib/Target/PTX/PTXInstrFormats.td2
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp25
-rw-r--r--lib/Target/PTX/PTXInstrInfo.h2
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td46
-rw-r--r--lib/Target/PTX/PTXInstrLoadStore.td2
-rw-r--r--lib/Target/PTX/PTXIntrinsicInstrInfo.td2
-rw-r--r--lib/Target/PTX/PTXMCAsmStreamer.cpp30
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp23
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.cpp14
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.h158
-rw-r--r--lib/Target/PTX/PTXParamManager.cpp4
-rw-r--r--lib/Target/PTX/PTXParamManager.h3
-rw-r--r--lib/Target/PTX/PTXRegAlloc.cpp7
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.cpp46
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.h7
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.td3
-rw-r--r--lib/Target/PTX/PTXSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/PTX/PTXSubtarget.cpp6
-rw-r--r--lib/Target/PTX/PTXSubtarget.h3
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp338
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h35
-rw-r--r--lib/Target/PTX/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/PTX/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt31
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp9
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h10
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt33
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp75
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp103
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp12
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h10
-rw-r--r--lib/Target/PowerPC/PPC.h5
-rw-r--r--lib/Target/PowerPC/PPC.td16
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp52
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td35
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp38
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp189
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h28
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp252
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h36
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td102
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td54
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td63
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp129
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h13
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td97
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp15
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h3
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h2
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp282
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h10
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td6
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h6
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td71
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td616
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td652
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td9
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td9
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td9
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td9
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp24
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h12
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp75
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h33
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/PowerPC/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/README.txt40
-rw-r--r--lib/Target/Sparc/CMakeLists.txt25
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp18
-rw-r--r--lib/Target/Sparc/FPMover.cpp8
-rw-r--r--lib/Target/Sparc/LLVMBuild.txt32
-rw-r--r--lib/Target/Sparc/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h2
-rw-r--r--lib/Target/Sparc/Sparc.h5
-rw-r--r--lib/Target/Sparc/Sparc.td6
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp22
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td6
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp2
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h2
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp1
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp50
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h8
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td6
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp19
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h11
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td6
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.cpp14
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h1
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp12
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td7
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp4
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h3
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp65
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h18
-rw-r--r--lib/Target/Sparc/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/Sparc/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt36
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt14
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp32
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h30
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp81
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h38
-rw-r--r--lib/Target/SystemZ/Makefile22
-rw-r--r--lib/Target/SystemZ/SystemZ.h52
-rw-r--r--lib/Target/SystemZ/SystemZ.td61
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp221
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td46
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp386
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h57
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp779
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp868
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h145
-rw-r--r--lib/Target/SystemZ/SystemZInstrBuilder.h128
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td340
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td133
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp439
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h113
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td1147
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h51
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td325
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp143
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h60
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td205
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp23
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h31
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp54
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h48
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp40
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h68
-rw-r--r--lib/Target/SystemZ/TargetInfo/CMakeLists.txt13
-rw-r--r--lib/Target/SystemZ/TargetInfo/Makefile15
-rw-r--r--lib/Target/TargetData.cpp123
-rw-r--r--lib/Target/TargetInstrInfo.cpp42
-rw-r--r--lib/Target/TargetJITInfo.cpp14
-rw-r--r--lib/Target/TargetLibraryInfo.cpp112
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp38
-rw-r--r--lib/Target/TargetMachine.cpp228
-rw-r--r--lib/Target/TargetMachineC.cpp197
-rw-r--r--lib/Target/TargetRegisterInfo.cpp4
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/X86/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp7
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp730
-rw-r--r--lib/Target/X86/CMakeLists.txt36
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt6
-rw-r--r--lib/Target/X86/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp235
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h46
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c80
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h15
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h23
-rw-r--r--lib/Target/X86/InstPrinter/CMakeLists.txt6
-rw-r--r--lib/Target/X86/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp78
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h10
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp329
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp72
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h9
-rw-r--r--lib/Target/X86/LLVMBuild.txt35
-rw-r--r--lib/Target/X86/MCTargetDesc/CMakeLists.txt12
-rw-r--r--lib/Target/X86/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp80
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h111
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp224
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp25
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h22
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp377
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp105
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h11
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp65
-rw-r--r--lib/Target/X86/README-SSE.txt20
-rw-r--r--lib/Target/X86/README.txt42
-rw-r--r--lib/Target/X86/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/X86/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/X86/Utils/CMakeLists.txt5
-rw-r--r--lib/Target/X86/Utils/LLVMBuild.txt23
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp193
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h84
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86.td114
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp48
-rw-r--r--lib/Target/X86/X86AsmPrinter.h5
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp2
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h4
-rw-r--r--lib/Target/X86/X86CallingConv.td58
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp18
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp8
-rw-r--r--lib/Target/X86/X86FastISel.cpp75
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp33
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp382
-rw-r--r--lib/Target/X86/X86FrameLowering.h2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp548
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp5371
-rw-r--r--lib/Target/X86/X86ISelLowering.h202
-rw-r--r--lib/Target/X86/X86Instr3DNow.td2
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td319
-rw-r--r--lib/Target/X86/X86InstrBuilder.h1
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td28
-rw-r--r--lib/Target/X86/X86InstrCompiler.td290
-rw-r--r--lib/Target/X86/X86InstrControl.td196
-rw-r--r--lib/Target/X86/X86InstrExtension.td84
-rw-r--r--lib/Target/X86/X86InstrFMA.td204
-rw-r--r--lib/Target/X86/X86InstrFPStack.td63
-rw-r--r--lib/Target/X86/X86InstrFormats.td340
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td215
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp441
-rw-r--r--lib/Target/X86/X86InstrInfo.h9
-rw-r--r--lib/Target/X86/X86InstrInfo.td223
-rw-r--r--lib/Target/X86/X86InstrMMX.td30
-rw-r--r--lib/Target/X86/X86InstrSSE.td6021
-rw-r--r--lib/Target/X86/X86InstrSVM.td62
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td548
-rw-r--r--lib/Target/X86/X86InstrSystem.td72
-rw-r--r--lib/Target/X86/X86InstrVMX.td38
-rw-r--r--lib/Target/X86/X86InstrXOP.td307
-rw-r--r--lib/Target/X86/X86JITInfo.cpp8
-rw-r--r--lib/Target/X86/X86JITInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp45
-rw-r--r--lib/Target/X86/X86MCInstLower.h2
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.cpp14
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h8
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp216
-rw-r--r--lib/Target/X86/X86RegisterInfo.h5
-rw-r--r--lib/Target/X86/X86RegisterInfo.td2
-rw-r--r--lib/Target/X86/X86Relocations.h2
-rw-r--r--lib/Target/X86/X86Schedule.td273
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td305
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp3
-rw-r--r--lib/Target/X86/X86Subtarget.cpp131
-rw-r--r--lib/Target/X86/X86Subtarget.h82
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp77
-rw-r--r--lib/Target/X86/X86TargetMachine.h44
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp3
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h3
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp234
-rw-r--r--lib/Target/XCore/CMakeLists.txt25
-rw-r--r--lib/Target/XCore/LLVMBuild.txt32
-rw-r--r--lib/Target/XCore/MCTargetDesc/CMakeLists.txt5
-rw-r--r--lib/Target/XCore/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp3
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h5
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp8
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h2
-rw-r--r--lib/Target/XCore/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/XCore/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/XCore/XCore.h3
-rw-r--r--lib/Target/XCore/XCore.td3
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp11
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp13
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp69
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h8
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp4
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h4
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp24
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h13
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.td2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp4
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h3
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp31
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h11
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.h2
-rw-r--r--lib/Transforms/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt10
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp14
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp2
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp227
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp715
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp102
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp58
-rw-r--r--lib/Transforms/IPO/Inliner.cpp149
-rw-r--r--lib/Transforms/IPO/Internalize.cpp7
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt23
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp27
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp3
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h10
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp74
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp243
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp160
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp44
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp89
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp71
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp58
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp39
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp83
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp97
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp375
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h4
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp85
-rw-r--r--lib/Transforms/InstCombine/LLVMBuild.txt22
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp937
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt10
-rw-r--r--lib/Transforms/Instrumentation/FunctionBlackList.cpp79
-rw-r--r--lib/Transforms/Instrumentation/FunctionBlackList.h37
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp139
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp2
-rw-r--r--lib/Transforms/Instrumentation/LLVMBuild.txt22
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp4
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp13
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp311
-rw-r--r--lib/Transforms/LLVMBuild.txt24
-rw-r--r--lib/Transforms/Makefile2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt10
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp47
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp13
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp92
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp289
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp146
-rw-r--r--lib/Transforms/Scalar/GVN.cpp339
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp (renamed from lib/Target/ARM/ARMGlobalMerge.cpp)39
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp553
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp35
-rw-r--r--lib/Transforms/Scalar/LICM.cpp17
-rw-r--r--lib/Transforms/Scalar/LLVMBuild.txt23
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp166
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp985
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp58
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp467
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp36
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp1228
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp10
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp500
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp70
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp182
-rw-r--r--lib/Transforms/Scalar/Sink.cpp3
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp9
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp16
-rw-r--r--lib/Transforms/Utils/BasicInliner.cpp182
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp3
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp34
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt12
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp208
-rw-r--r--lib/Transforms/Utils/CmpInstAnalysis.cpp96
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp7
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp57
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp609
-rw-r--r--lib/Transforms/Utils/LLVMBuild.txt22
-rw-r--r--lib/Transforms/Utils/Local.cpp109
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp61
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp40
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp372
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp22
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp28
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp10
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp64
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp9
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp7
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp619
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp45
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp12
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp20
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp1907
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt4
-rw-r--r--lib/Transforms/Vectorize/LLVMBuild.txt24
-rw-r--r--lib/Transforms/Vectorize/Makefile (renamed from lib/Target/CBackend/Makefile)7
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp39
-rw-r--r--lib/VMCore/AsmWriter.cpp135
-rw-r--r--lib/VMCore/Attributes.cpp8
-rw-r--r--lib/VMCore/AutoUpgrade.cpp598
-rw-r--r--lib/VMCore/BasicBlock.cpp3
-rw-r--r--lib/VMCore/CMakeLists.txt3
-rw-r--r--lib/VMCore/ConstantFold.cpp546
-rw-r--r--lib/VMCore/Constants.cpp1223
-rw-r--r--lib/VMCore/ConstantsContext.h234
-rw-r--r--lib/VMCore/Core.cpp73
-rw-r--r--lib/VMCore/DebugInfoProbe.cpp225
-rw-r--r--lib/VMCore/DebugLoc.cpp5
-rw-r--r--lib/VMCore/Dominators.cpp202
-rw-r--r--lib/VMCore/Function.cpp52
-rw-r--r--lib/VMCore/GCOV.cpp2
-rw-r--r--lib/VMCore/IRBuilder.cpp4
-rw-r--r--lib/VMCore/Instruction.cpp56
-rw-r--r--lib/VMCore/Instructions.cpp257
-rw-r--r--lib/VMCore/LLVMBuild.txt22
-rw-r--r--lib/VMCore/LLVMContext.cpp18
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp51
-rw-r--r--lib/VMCore/LLVMContextImpl.h162
-rw-r--r--lib/VMCore/Metadata.cpp89
-rw-r--r--lib/VMCore/Module.cpp56
-rw-r--r--lib/VMCore/Pass.cpp24
-rw-r--r--lib/VMCore/PassManager.cpp161
-rw-r--r--lib/VMCore/Type.cpp185
-rw-r--r--lib/VMCore/Use.cpp1
-rw-r--r--lib/VMCore/User.cpp2
-rw-r--r--lib/VMCore/Value.cpp127
-rw-r--r--lib/VMCore/ValueTypes.cpp12
-rw-r--r--lib/VMCore/Verifier.cpp284
1307 files changed, 111145 insertions, 70971 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index bd132c05c327..95c834b451c9 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -440,3 +440,19 @@ bool llvm::isIdentifiedObject(const Value *V) {
return A->hasNoAliasAttr() || A->hasByValAttr();
return false;
}
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+bool llvm::isKnownNonNull(const Value *V) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V)) return true;
+
+ // A byval argument is never null.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+
+ // Global values are not null unless extern weak.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return !GV->hasExternalWeakLinkage();
+ return false;
+}
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index d947220e078d..9f219f563739 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -127,9 +127,8 @@ AliasAnalysis::AliasResult
AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
- const char *AliasString;
+ const char *AliasString = 0;
switch (R) {
- default: llvm_unreachable("Unknown alias type!");
case NoAlias: No++; AliasString = "No alias"; break;
case MayAlias: May++; AliasString = "May alias"; break;
case PartialAlias: Partial++; AliasString = "Partial alias"; break;
@@ -154,9 +153,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
const Location &Loc) {
ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
- const char *MRString;
+ const char *MRString = 0;
switch (R) {
- default: llvm_unreachable("Unknown mod/ref type!");
case NoModRef: NoMR++; MRString = "NoModRef"; break;
case Ref: JustRef++; MRString = "JustRef"; break;
case Mod: JustMod++; MRString = "JustMod"; break;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 37271b94a201..ac72983a8d7b 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -193,8 +193,6 @@ bool AAEval::runOnFunction(Function &F) {
case AliasAnalysis::MustAlias:
PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
++MustAlias; break;
- default:
- errs() << "Unknown alias query result!\n";
}
}
}
@@ -223,8 +221,6 @@ bool AAEval::runOnFunction(Function &F) {
case AliasAnalysis::ModRef:
PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
++ModRef; break;
- default:
- errs() << "Unknown alias query result!\n";
}
}
}
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 3fcd3b55de57..f80e2fba8010 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -189,7 +189,9 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
}
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) !=
+ if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(),
+ I.getSize(),
+ I.getTBAAInfo())) !=
AliasAnalysis::NoModRef)
return true;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index af400ba7e70e..20ecfd26a986 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -42,22 +42,6 @@ using namespace llvm;
// Useful predicates
//===----------------------------------------------------------------------===//
-/// isKnownNonNull - Return true if we know that the specified value is never
-/// null.
-static bool isKnownNonNull(const Value *V) {
- // Alloca never returns null, malloc might.
- if (isa<AllocaInst>(V)) return true;
-
- // A byval argument is never null.
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValAttr();
-
- // Global values are not null unless extern weak.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return !GV->hasExternalWeakLinkage();
- return false;
-}
-
/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
/// object that never escapes from the function.
static bool isNonEscapingLocalObject(const Value *V) {
@@ -100,42 +84,59 @@ static bool isEscapeSource(const Value *V) {
/// getObjectSize - Return the size of the object specified by V, or
/// UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
+static uint64_t getObjectSize(const Value *V, const TargetData &TD,
+ bool RoundToAlign = false) {
Type *AccessTy;
+ unsigned Align;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (!GV->hasDefinitiveInitializer())
return AliasAnalysis::UnknownSize;
AccessTy = GV->getType()->getElementType();
+ Align = GV->getAlignment();
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
if (!AI->isArrayAllocation())
AccessTy = AI->getType()->getElementType();
else
return AliasAnalysis::UnknownSize;
+ Align = AI->getAlignment();
} else if (const CallInst* CI = extractMallocCall(V)) {
- if (!isArrayMalloc(V, &TD))
+ if (!RoundToAlign && !isArrayMalloc(V, &TD))
// The size is the argument to the malloc call.
if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
return C->getZExtValue();
return AliasAnalysis::UnknownSize;
} else if (const Argument *A = dyn_cast<Argument>(V)) {
- if (A->hasByValAttr())
+ if (A->hasByValAttr()) {
AccessTy = cast<PointerType>(A->getType())->getElementType();
- else
+ Align = A->getParamAlignment();
+ } else {
return AliasAnalysis::UnknownSize;
+ }
} else {
return AliasAnalysis::UnknownSize;
}
-
- if (AccessTy->isSized())
- return TD.getTypeAllocSize(AccessTy);
- return AliasAnalysis::UnknownSize;
+
+ if (!AccessTy->isSized())
+ return AliasAnalysis::UnknownSize;
+
+ uint64_t Size = TD.getTypeAllocSize(AccessTy);
+ // If there is an explicitly specified alignment, and we need to
+ // take alignment into account, round up the size. (If the alignment
+ // is implicit, getTypeAllocSize is sufficient.)
+ if (RoundToAlign && Align)
+ Size = RoundUpToAlignment(Size, Align);
+
+ return Size;
}
/// isObjectSmallerThan - Return true if we can prove that the object specified
/// by V is smaller than Size.
static bool isObjectSmallerThan(const Value *V, uint64_t Size,
const TargetData &TD) {
- uint64_t ObjectSize = getObjectSize(V, TD);
+ // This function needs to use the aligned object size because we allow
+ // reads a bit past the end given sufficient alignment.
+ uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true);
+
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
}
@@ -706,8 +707,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) &&
- !CS.paramHasAttr(ArgNo+1, Attribute::ByVal)))
+ (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
@@ -978,10 +978,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
//
// TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
// practical effect of this is protecting TBAA in the case of dynamic
- // indices into arrays of unions. An alternative way to solve this would
- // be to have clang emit extra metadata for unions and/or union accesses.
- // A union-specific solution wouldn't handle the problem for malloc'd
- // memory however.
+ // indices into arrays of unions or malloc'd memory.
return PartialAlias;
}
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index d16665fa55cf..8a660f737c9b 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -58,6 +58,6 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
/// that we should not rely on the value itself, but only on the comparison to
/// the other block frequencies. We do this to avoid using of floating points.
///
-BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const {
+BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
return BFI->getBlockFreq(BB);
}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index bde3b76708fa..2730ce6c63bf 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -12,11 +12,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Constants.h"
+#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -29,121 +32,118 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
char BranchProbabilityInfo::ID = 0;
-namespace {
-// Please note that BranchProbabilityAnalysis is not a FunctionPass.
-// It is created by BranchProbabilityInfo (which is a FunctionPass), which
-// provides a clear interface. Thanks to that, all heuristics and other
-// private methods are hidden in the .cpp file.
-class BranchProbabilityAnalysis {
-
- typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
-
- DenseMap<Edge, uint32_t> *Weights;
-
- BranchProbabilityInfo *BP;
-
- LoopInfo *LI;
-
-
- // Weights are for internal use only. They are used by heuristics to help to
- // estimate edges' probability. Example:
- //
- // Using "Loop Branch Heuristics" we predict weights of edges for the
- // block BB2.
- // ...
- // |
- // V
- // BB1<-+
- // | |
- // | | (Weight = 124)
- // V |
- // BB2--+
- // |
- // | (Weight = 4)
- // V
- // BB3
- //
- // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
- // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
-
- static const uint32_t LBH_TAKEN_WEIGHT = 124;
- static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
-
- static const uint32_t RH_TAKEN_WEIGHT = 24;
- static const uint32_t RH_NONTAKEN_WEIGHT = 8;
-
- static const uint32_t PH_TAKEN_WEIGHT = 20;
- static const uint32_t PH_NONTAKEN_WEIGHT = 12;
-
- static const uint32_t ZH_TAKEN_WEIGHT = 20;
- static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
-
- // Standard weight value. Used when none of the heuristics set weight for
- // the edge.
- static const uint32_t NORMAL_WEIGHT = 16;
-
- // Minimum weight of an edge. Please note, that weight is NEVER 0.
- static const uint32_t MIN_WEIGHT = 1;
-
- // Return TRUE if BB leads directly to a Return Instruction.
- static bool isReturningBlock(BasicBlock *BB) {
- SmallPtrSet<BasicBlock *, 8> Visited;
-
- while (true) {
- TerminatorInst *TI = BB->getTerminator();
- if (isa<ReturnInst>(TI))
- return true;
-
- if (TI->getNumSuccessors() > 1)
- break;
-
- // It is unreachable block which we can consider as a return instruction.
- if (TI->getNumSuccessors() == 0)
- return true;
-
- Visited.insert(BB);
- BB = TI->getSuccessor(0);
+// Weights are for internal use only. They are used by heuristics to help to
+// estimate edges' probability. Example:
+//
+// Using "Loop Branch Heuristics" we predict weights of edges for the
+// block BB2.
+// ...
+// |
+// V
+// BB1<-+
+// | |
+// | | (Weight = 124)
+// V |
+// BB2--+
+// |
+// | (Weight = 4)
+// V
+// BB3
+//
+// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
+// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
+static const uint32_t LBH_TAKEN_WEIGHT = 124;
+static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+/// \brief Unreachable-terminating branch taken weight.
+///
+/// This is the weight for a branch being taken to a block that terminates
+/// (eventually) in unreachable. These are predicted as unlikely as possible.
+static const uint32_t UR_TAKEN_WEIGHT = 1;
+
+/// \brief Unreachable-terminating branch not-taken weight.
+///
+/// This is the weight for a branch not being taken toward a block that
+/// terminates (eventually) in unreachable. Such a branch is essentially never
+/// taken. Set the weight to an absurdly high value so that nested loops don't
+/// easily subsume it.
+static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+
+static const uint32_t PH_TAKEN_WEIGHT = 20;
+static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t ZH_TAKEN_WEIGHT = 20;
+static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t FPH_TAKEN_WEIGHT = 20;
+static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+
+// Standard weight value. Used when none of the heuristics set weight for
+// the edge.
+static const uint32_t NORMAL_WEIGHT = 16;
+
+// Minimum weight of an edge. Please note, that weight is NEVER 0.
+static const uint32_t MIN_WEIGHT = 1;
+
+static uint32_t getMaxWeightFor(BasicBlock *BB) {
+ return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+}
- // Stop if cycle is detected.
- if (Visited.count(BB))
- return false;
- }
+/// \brief Calculate edge weights for successors lead to unreachable.
+///
+/// Predict that a successor which leads necessarily to an
+/// unreachable-terminated block as extremely unlikely.
+bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(TI))
+ PostDominatedByUnreachable.insert(BB);
return false;
}
- uint32_t getMaxWeightFor(BasicBlock *BB) const {
- return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
- }
+ SmallPtrSet<BasicBlock *, 4> UnreachableEdges;
+ SmallPtrSet<BasicBlock *, 4> ReachableEdges;
-public:
- BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
- BranchProbabilityInfo *BP, LoopInfo *LI)
- : Weights(W), BP(BP), LI(LI) {
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ if (PostDominatedByUnreachable.count(*I))
+ UnreachableEdges.insert(*I);
+ else
+ ReachableEdges.insert(*I);
}
- // Metadata Weights
- bool calcMetadataWeights(BasicBlock *BB);
+ // If all successors are in the set of blocks post-dominated by unreachable,
+ // this block is too.
+ if (UnreachableEdges.size() == TI->getNumSuccessors())
+ PostDominatedByUnreachable.insert(BB);
- // Return Heuristics
- bool calcReturnHeuristics(BasicBlock *BB);
-
- // Pointer Heuristics
- bool calcPointerHeuristics(BasicBlock *BB);
-
- // Loop Branch Heuristics
- bool calcLoopBranchHeuristics(BasicBlock *BB);
+ // Skip probabilities if this block has a single successor or if all were
+ // reachable.
+ if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
+ return false;
- // Zero Heurestics
- bool calcZeroHeuristics(BasicBlock *BB);
+ uint32_t UnreachableWeight =
+ std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT);
+ for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(),
+ E = UnreachableEdges.end();
+ I != E; ++I)
+ setEdgeWeight(BB, *I, UnreachableWeight);
+
+ if (ReachableEdges.empty())
+ return true;
+ uint32_t ReachableWeight =
+ std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT);
+ for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(),
+ E = ReachableEdges.end();
+ I != E; ++I)
+ setEdgeWeight(BB, *I, ReachableWeight);
- bool runOnFunction(Function &F);
-};
-} // end anonymous namespace
+ return true;
+}
// Propagate existing explicit probabilities from either profile data or
// 'expect' intrinsic processing.
-bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) {
+bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 1)
return false;
@@ -174,54 +174,14 @@ bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) {
}
assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]);
+ setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]);
return true;
}
-// Calculate Edge Weights using "Return Heuristics". Predict a successor which
-// leads directly to Return Instruction will not be taken.
-bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
- if (BB->getTerminator()->getNumSuccessors() == 1)
- return false;
-
- SmallPtrSet<BasicBlock *, 4> ReturningEdges;
- SmallPtrSet<BasicBlock *, 4> StayEdges;
-
- for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- BasicBlock *Succ = *I;
- if (isReturningBlock(Succ))
- ReturningEdges.insert(Succ);
- else
- StayEdges.insert(Succ);
- }
-
- if (uint32_t numStayEdges = StayEdges.size()) {
- uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges;
- if (stayWeight < NORMAL_WEIGHT)
- stayWeight = NORMAL_WEIGHT;
-
- for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(),
- E = StayEdges.end(); I != E; ++I)
- BP->setEdgeWeight(BB, *I, stayWeight);
- }
-
- if (uint32_t numRetEdges = ReturningEdges.size()) {
- uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges;
- if (retWeight < MIN_WEIGHT)
- retWeight = MIN_WEIGHT;
- for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(),
- E = ReturningEdges.end(); I != E; ++I) {
- BP->setEdgeWeight(BB, *I, retWeight);
- }
- }
-
- return ReturningEdges.size() > 0;
-}
-
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
// between two pointer or pointer and NULL will fail.
-bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
@@ -249,16 +209,14 @@ bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(Taken, NonTaken);
- BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT);
- BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT);
+ setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT);
return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
-bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
- uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
-
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
Loop *L = LI->getLoopFor(BB);
if (!L)
return false;
@@ -267,17 +225,13 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
SmallPtrSet<BasicBlock *, 8> ExitingEdges;
SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop.
- bool isHeader = BB == L->getHeader();
-
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- BasicBlock *Succ = *I;
- Loop *SuccL = LI->getLoopFor(Succ);
- if (SuccL != L)
- ExitingEdges.insert(Succ);
- else if (Succ == L->getHeader())
- BackEdges.insert(Succ);
- else if (isHeader)
- InEdges.insert(Succ);
+ if (!L->contains(*I))
+ ExitingEdges.insert(*I);
+ else if (L->getHeader() == *I)
+ BackEdges.insert(*I);
+ else
+ InEdges.insert(*I);
}
if (uint32_t numBackEdges = BackEdges.size()) {
@@ -288,7 +242,7 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
EE = BackEdges.end(); EI != EE; ++EI) {
BasicBlock *Back = *EI;
- BP->setEdgeWeight(BB, Back, backWeight);
+ setEdgeWeight(BB, Back, backWeight);
}
}
@@ -300,27 +254,26 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(),
EE = InEdges.end(); EI != EE; ++EI) {
BasicBlock *Back = *EI;
- BP->setEdgeWeight(BB, Back, inWeight);
+ setEdgeWeight(BB, Back, inWeight);
}
}
- uint32_t numExitingEdges = ExitingEdges.size();
- if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
- uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
+ if (uint32_t numExitingEdges = ExitingEdges.size()) {
+ uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
if (exitWeight < MIN_WEIGHT)
exitWeight = MIN_WEIGHT;
for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
EE = ExitingEdges.end(); EI != EE; ++EI) {
BasicBlock *Exiting = *EI;
- BP->setEdgeWeight(BB, Exiting, exitWeight);
+ setEdgeWeight(BB, Exiting, exitWeight);
}
}
return true;
}
-bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) {
+bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
@@ -375,45 +328,94 @@ bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(Taken, NonTaken);
- BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT);
- BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT);
+ setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT);
return true;
}
+bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
-bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
-
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
-
- if (calcMetadataWeights(BB))
- continue;
+ Value *Cond = BI->getCondition();
+ FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
+ if (!FCmp)
+ return false;
- if (calcLoopBranchHeuristics(BB))
- continue;
+ bool isProb;
+ if (FCmp->isEquality()) {
+ // f1 == f2 -> Unlikely
+ // f1 != f2 -> Likely
+ isProb = !FCmp->isTrueWhenEqual();
+ } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
+ // !isnan -> Likely
+ isProb = true;
+ } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
+ // isnan -> Unlikely
+ isProb = false;
+ } else {
+ return false;
+ }
- if (calcReturnHeuristics(BB))
- continue;
+ BasicBlock *Taken = BI->getSuccessor(0);
+ BasicBlock *NonTaken = BI->getSuccessor(1);
- if (calcPointerHeuristics(BB))
- continue;
+ if (!isProb)
+ std::swap(Taken, NonTaken);
- calcZeroHeuristics(BB);
- }
+ setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT);
+ setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT);
- return false;
+ return true;
}
void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
- AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
}
bool BranchProbabilityInfo::runOnFunction(Function &F) {
- LoopInfo &LI = getAnalysis<LoopInfo>();
- BranchProbabilityAnalysis BPA(&Weights, this, &LI);
- return BPA.runOnFunction(F);
+ LastF = &F; // Store the last function we ran on for printing.
+ LI = &getAnalysis<LoopInfo>();
+ assert(PostDominatedByUnreachable.empty());
+
+ // Walk the basic blocks in post-order so that we can build up state about
+ // the successors of a block iteratively.
+ for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
+ E = po_end(&F.getEntryBlock());
+ I != E; ++I) {
+ DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
+ if (calcUnreachableHeuristics(*I))
+ continue;
+ if (calcMetadataWeights(*I))
+ continue;
+ if (calcLoopBranchHeuristics(*I))
+ continue;
+ if (calcPointerHeuristics(*I))
+ continue;
+ if (calcZeroHeuristics(*I))
+ continue;
+ calcFloatingPointHeuristics(*I);
+ }
+
+ PostDominatedByUnreachable.clear();
+ return false;
+}
+
+void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "---- Branch Probabilities ----\n";
+ // We print the probabilities from the last function the analysis ran over,
+ // or the function it is currently running over.
+ assert(LastF && "Cannot print prior to running over a function");
+ for (Function::const_iterator BI = LastF->begin(), BE = LastF->end();
+ BI != BE; ++BI) {
+ for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI);
+ SI != SE; ++SI) {
+ printEdgeProbability(OS << " ", BI, *SI);
+ }
+ }
}
uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
@@ -434,12 +436,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
bool BranchProbabilityInfo::
isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
- uint32_t Weight = getEdgeWeight(Src, Dst);
- uint32_t Sum = getSumForBlock(Src);
-
- // FIXME: Implement BranchProbability::compare then change this code to
- // compare this BranchProbability against a static "hot" BranchProbability.
- return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+ // FIXME: Compare against a static "hot" BranchProbability.
+ return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
}
BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
@@ -461,8 +459,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
}
}
- // FIXME: Use BranchProbability::compare.
- if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4)
+ // Hot probability is at least 4/5 = 80%
+ if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
return MaxSucc;
return 0;
@@ -483,8 +481,8 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
void BranchProbabilityInfo::
setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) {
Weights[std::make_pair(Src, Dst)] = Weight;
- DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
- << Dst->getNameStr() << " weight to " << Weight
+ DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
+ << Dst->getName() << " weight to " << Weight
<< (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n"));
}
@@ -499,11 +497,12 @@ getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
}
raw_ostream &
-BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
- BasicBlock *Dst) const {
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
+ const BasicBlock *Src,
+ const BasicBlock *Dst) const {
const BranchProbability Prob = getEdgeProbability(Src, Dst);
- OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
+ OS << "edge " << Src->getName() << " -> " << Dst->getName()
<< " probability is " << Prob
<< (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 7bb063fbbbcf..76854000bd23 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
}
virtual bool runOnFunction(Function &F) {
- std::string Filename = "cfg." + F.getNameStr() + ".dot";
+ std::string Filename = "cfg." + F.getName().str() + ".dot";
errs() << "Writing '" << Filename << "'...";
std::string ErrorInfo;
@@ -111,7 +111,7 @@ namespace {
}
virtual bool runOnFunction(Function &F) {
- std::string Filename = "cfg." + F.getNameStr() + ".dot";
+ std::string Filename = "cfg." + F.getName().str() + ".dot";
errs() << "Writing '" << Filename << "'...";
std::string ErrorInfo;
@@ -143,7 +143,7 @@ INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
/// being a 'dot' and 'gv' program in your path.
///
void Function::viewCFG() const {
- ViewGraph(this, "cfg" + getNameStr());
+ ViewGraph(this, "cfg" + getName());
}
/// viewCFGOnly - This function is meant for use from the debugger. It works
@@ -152,7 +152,7 @@ void Function::viewCFG() const {
/// his can make the graph smaller.
///
void Function::viewCFGOnly() const {
- ViewGraph(this, "cfg" + getNameStr(), true);
+ ViewGraph(this, "cfg" + getName(), true);
}
FunctionPass *llvm::createCFGPrinterPass () {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index e79459d7a409..2e3ec8bebc86 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis
BranchProbabilityInfo.cpp
CFGPrinter.cpp
CaptureTracking.cpp
+ CodeMetrics.cpp
ConstantFolding.cpp
DIBuilder.cpp
DbgInfoPrinter.cpp
@@ -58,10 +59,4 @@ add_llvm_library(LLVMAnalysis
ValueTracking.cpp
)
-add_llvm_library_dependencies(LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(IPA)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index b2c27d1dfc4b..dd33eeb1b376 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -16,25 +16,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Value.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/Analysis/CaptureTracking.h"
using namespace llvm;
-/// As its comment mentions, PointerMayBeCaptured can be expensive.
-/// However, it's not easy for BasicAA to cache the result, because
-/// it's an ImmutablePass. To work around this, bound queries at a
-/// fixed number of uses.
-///
-/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
-/// a cache. Then we can move the code from BasicAliasAnalysis into
-/// that path, and remove this threshold.
-static int const Threshold = 20;
+CaptureTracker::~CaptureTracker() {}
+
+namespace {
+ struct SimpleCaptureTracker : public CaptureTracker {
+ explicit SimpleCaptureTracker(bool ReturnCaptures)
+ : ReturnCaptures(ReturnCaptures), Captured(false) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool shouldExplore(Use *U) { return true; }
+
+ bool captured(Use *U) {
+ if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
+ return false;
+
+ Captured = true;
+ return true;
+ }
+
+ bool ReturnCaptures;
+
+ bool Captured;
+ };
+}
/// PointerMayBeCaptured - Return true if this pointer value may be captured
/// by the enclosing function (which is required to exist). This routine can
@@ -45,6 +55,26 @@ static int const Threshold = 20;
/// counts as capturing it or not.
bool llvm::PointerMayBeCaptured(const Value *V,
bool ReturnCaptures, bool StoreCaptures) {
+ assert(!isa<GlobalValue>(V) &&
+ "It doesn't make sense to ask whether a global is captured.");
+
+ // TODO: If StoreCaptures is not true, we could do Fancy analysis
+ // to determine whether this store is not actually an escape point.
+ // In that case, BasicAliasAnalysis should be updated as well to
+ // take advantage of this.
+ (void)StoreCaptures;
+
+ SimpleCaptureTracker SCT(ReturnCaptures);
+ PointerMayBeCaptured(V, &SCT);
+ return SCT.Captured;
+}
+
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
SmallVector<Use*, Threshold> Worklist;
SmallSet<Use*, Threshold> Visited;
@@ -55,9 +85,10 @@ bool llvm::PointerMayBeCaptured(const Value *V,
// If there are lots of uses, conservatively say that the value
// is captured to avoid taking too much compile time.
if (Count++ >= Threshold)
- return true;
+ return Tracker->tooManyUses();
Use *U = &UI.getUse();
+ if (!Tracker->shouldExplore(U)) continue;
Visited.insert(U);
Worklist.push_back(U);
}
@@ -86,11 +117,10 @@ bool llvm::PointerMayBeCaptured(const Value *V,
// (think of self-referential objects).
CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
for (CallSite::arg_iterator A = B; A != E; ++A)
- if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+ if (A->get() == V && !CS.doesNotCapture(A - B))
// The parameter is not marked 'nocapture' - captured.
- return true;
- // Only passed via 'nocapture' arguments, or is the called function - not
- // captured.
+ if (Tracker->captured(U))
+ return;
break;
}
case Instruction::Load:
@@ -99,18 +129,11 @@ bool llvm::PointerMayBeCaptured(const Value *V,
case Instruction::VAArg:
// "va-arg" from a pointer does not cause it to be captured.
break;
- case Instruction::Ret:
- if (ReturnCaptures)
- return true;
- break;
case Instruction::Store:
if (V == I->getOperand(0))
// Stored the pointer - conservatively assume it may be captured.
- // TODO: If StoreCaptures is not true, we could do Fancy analysis
- // to determine whether this store is not actually an escape point.
- // In that case, BasicAliasAnalysis should be updated as well to
- // take advantage of this.
- return true;
+ if (Tracker->captured(U))
+ return;
// Storing to the pointee does not cause the pointer to be captured.
break;
case Instruction::BitCast:
@@ -122,7 +145,8 @@ bool llvm::PointerMayBeCaptured(const Value *V,
UI != UE; ++UI) {
Use *U = &UI.getUse();
if (Visited.insert(U))
- Worklist.push_back(U);
+ if (Tracker->shouldExplore(U))
+ Worklist.push_back(U);
}
break;
case Instruction::ICmp:
@@ -136,13 +160,16 @@ bool llvm::PointerMayBeCaptured(const Value *V,
break;
// Otherwise, be conservative. There are crazy ways to capture pointers
// using comparisons.
- return true;
+ if (Tracker->captured(U))
+ return;
+ break;
default:
// Something else - be conservative and say it is captured.
- return true;
+ if (Tracker->captured(U))
+ return;
+ break;
}
}
- // All uses examined - not captured.
- return false;
+ // All uses examined.
}
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
new file mode 100644
index 000000000000..316e7bc9349a
--- /dev/null
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -0,0 +1,184 @@
+//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements code cost measurement utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+ if (!F) return false;
+
+ if (F->hasLocalLinkage()) return false;
+
+ if (!F->hasName()) return false;
+
+ StringRef Name = F->getName();
+
+ // These will all likely lower to a single selection DAG node.
+ if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+ Name == "sin" || Name == "sinf" || Name == "sinl" ||
+ Name == "cos" || Name == "cosf" || Name == "cosl" ||
+ Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+ return true;
+
+ // These are all likely to be optimized into something smaller.
+ if (Name == "pow" || Name == "powf" || Name == "powl" ||
+ Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+ Name == "floor" || Name == "floorf" || Name == "ceil" ||
+ Name == "round" || Name == "ffs" || Name == "ffsl" ||
+ Name == "abs" || Name == "labs" || Name == "llabs")
+ return true;
+
+ return false;
+}
+
+bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) {
+ if (isa<PHINode>(I))
+ return true;
+
+ // If a GEP has all constant indices, it will probably be folded with
+ // a load/store.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ return GEP->hasAllConstantIndices();
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // These intrinsics don't count as size.
+ return true;
+ }
+ }
+
+ if (const CastInst *CI = dyn_cast<CastInst>(I)) {
+ // Noop casts, including ptr <-> int, don't count.
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI))
+ return true;
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (TD && isa<TruncInst>(CI) &&
+ TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+ return true;
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // nop on most sane targets.
+ if (isa<CmpInst>(CI->getOperand(0)))
+ return true;
+ }
+
+ return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+ const TargetData *TD) {
+ ++NumBlocks;
+ unsigned NumInstsBeforeThisBB = NumInsts;
+ for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+ II != E; ++II) {
+ if (isInstructionFree(II, TD))
+ continue;
+
+ // Special handling for calls.
+ if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+ ImmutableCallSite CS(cast<Instruction>(II));
+
+ if (const Function *F = CS.getCalledFunction()) {
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
+ // exposed by an interleaved devirtualization pass).
+ if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+ ++NumInlineCandidates;
+
+ // If this call is to function itself, then the function is recursive.
+ // Inlining it into other functions is a bad idea, because this is
+ // basically just a form of loop peeling, and our metrics aren't useful
+ // for that case.
+ if (F == BB->getParent())
+ isRecursive = true;
+ }
+
+ if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+ // Each argument to a call takes on average one instruction to set up.
+ NumInsts += CS.arg_size();
+
+ // We don't want inline asm to count as a call - that would prevent loop
+ // unrolling. The argument setup cost is still real, though.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ ++NumCalls;
+ }
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (!AI->isStaticAlloca())
+ this->usesDynamicAlloca = true;
+ }
+
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+ ++NumVectorInsts;
+
+ ++NumInsts;
+ }
+
+ if (isa<ReturnInst>(BB->getTerminator()))
+ ++NumRets;
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this indirect
+ // jump would jump from the inlined copy of the function into the original
+ // function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions
+ // with indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably
+ // don't want to inline this function.
+ if (isa<IndirectBrInst>(BB->getTerminator()))
+ containsIndirectBr = true;
+
+ // Remember NumInsts for this BB.
+ NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
+ // If this function contains a call that "returns twice" (e.g., setjmp or
+ // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
+ // This is a hack because we depend on the user marking their local variables
+ // as volatile if they are live across a setjmp call, and they probably
+ // won't do this in callers.
+ exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
+ !F->hasFnAttr(Attribute::ReturnsTwice);
+
+ // Look at the size of the callee.
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ analyzeBasicBlock(&*BB, TD);
+}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index df79849c3cf4..7a0a4e1e8246 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -26,6 +26,7 @@
#include "llvm/Operator.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
@@ -51,6 +52,42 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
return Constant::getAllOnesValue(DestTy);
+ // Handle a vector->integer cast.
+ if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
+ ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+ if (CDV == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ unsigned NumSrcElts = CDV->getType()->getNumElements();
+
+ Type *SrcEltTy = CDV->getType()->getElementType();
+
+ // If the vector is a vector of floating point, convert it to vector of int
+ // to simplify things.
+ if (SrcEltTy->isFloatingPointTy()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ Type *SrcIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+ // Ask VMCore to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ CDV = cast<ConstantDataVector>(C);
+ }
+
+ // Now that we know that the input value is a vector of integers, just shift
+ // and insert them into our result.
+ unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
+ APInt Result(IT->getBitWidth(), 0);
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ Result <<= BitShift;
+ if (TD.isLittleEndian())
+ Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
+ else
+ Result |= CDV->getElementAsInteger(i);
+ }
+
+ return ConstantInt::get(IT, Result);
+ }
+
// The code below only handles casts to vectors currently.
VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
if (DestVTy == 0)
@@ -64,17 +101,16 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
}
// If this is a bitcast from constant vector -> vector, fold it.
- ConstantVector *CV = dyn_cast<ConstantVector>(C);
- if (CV == 0)
+ if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
return ConstantExpr::getBitCast(C, DestTy);
// If the element types match, VMCore can fold it.
unsigned NumDstElt = DestVTy->getNumElements();
- unsigned NumSrcElt = CV->getNumOperands();
+ unsigned NumSrcElt = C->getType()->getVectorNumElements();
if (NumDstElt == NumSrcElt)
return ConstantExpr::getBitCast(C, DestTy);
- Type *SrcEltTy = CV->getType()->getElementType();
+ Type *SrcEltTy = C->getType()->getVectorElementType();
Type *DstEltTy = DestVTy->getElementType();
// Otherwise, we're changing the number of elements in a vector, which
@@ -94,7 +130,6 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
C = FoldBitCast(C, DestIVTy, TD);
- if (!C) return ConstantExpr::getBitCast(C, DestTy);
// Finally, VMCore can handle this now that #elts line up.
return ConstantExpr::getBitCast(C, DestTy);
@@ -108,8 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
// Ask VMCore to do the conversion now that #elts line up.
C = ConstantExpr::getBitCast(C, SrcIVTy);
- CV = dyn_cast<ConstantVector>(C);
- if (!CV) // If VMCore wasn't able to fold it, bail out.
+ // If VMCore wasn't able to fold it, bail out.
+ if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.
+ !isa<ConstantDataVector>(C))
return C;
}
@@ -131,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
Constant *Elt = Zero;
unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
for (unsigned j = 0; j != Ratio; ++j) {
- Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+ Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
if (!Src) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
@@ -148,28 +184,29 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
}
Result.push_back(Elt);
}
- } else {
- // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
- unsigned Ratio = NumDstElt/NumSrcElt;
- unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+ return ConstantVector::get(Result);
+ }
+
+ // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ unsigned Ratio = NumDstElt/NumSrcElt;
+ unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+
+ // Loop over each source value, expanding into multiple results.
+ for (unsigned i = 0; i != NumSrcElt; ++i) {
+ Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
- // Loop over each source value, expanding into multiple results.
- for (unsigned i = 0; i != NumSrcElt; ++i) {
- Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
- if (!Src) // Reject constantexpr elements.
- return ConstantExpr::getBitCast(C, DestTy);
+ unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ // Shift the piece of the value into the right place, depending on
+ // endianness.
+ Constant *Elt = ConstantExpr::getLShr(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
- unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
- for (unsigned j = 0; j != Ratio; ++j) {
- // Shift the piece of the value into the right place, depending on
- // endianness.
- Constant *Elt = ConstantExpr::getLShr(Src,
- ConstantInt::get(Src->getType(), ShiftAmt));
- ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
-
- // Truncate and remember this piece.
- Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
- }
+ // Truncate and remember this piece.
+ Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
}
}
@@ -272,7 +309,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
}
return false;
}
-
+
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
const StructLayout *SL = TD.getStructLayout(CS->getType());
unsigned Index = SL->getElementContainingOffset(ByteOffset);
@@ -310,12 +347,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
// not reached.
}
- if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
- uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType());
+ if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
+ isa<ConstantDataSequential>(C)) {
+ Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+ uint64_t EltSize = TD.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
- for (; Index != CA->getType()->getNumElements(); ++Index) {
- if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr,
+ uint64_t NumElts;
+ if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
+ NumElts = AT->getNumElements();
+ else
+ NumElts = cast<VectorType>(C->getType())->getNumElements();
+
+ for (; Index != NumElts; ++Index) {
+ if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
BytesLeft, TD))
return false;
if (EltSize >= BytesLeft)
@@ -327,30 +372,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
}
return true;
}
-
- if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
- uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType());
- uint64_t Index = ByteOffset / EltSize;
- uint64_t Offset = ByteOffset - Index * EltSize;
- for (; Index != CV->getType()->getNumElements(); ++Index) {
- if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr,
- BytesLeft, TD))
- return false;
- if (EltSize >= BytesLeft)
- return true;
- Offset = 0;
- BytesLeft -= EltSize;
- CurPtr += EltSize;
- }
- return true;
- }
-
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
- return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
- BytesLeft, TD);
+ return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+ BytesLeft, TD);
}
// Otherwise, unknown initializer type.
@@ -445,9 +472,9 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
- std::string Str;
- if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
- unsigned StrLen = Str.length();
+ StringRef Str;
+ if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
+ unsigned StrLen = Str.size();
Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
// Replace load with immediate integer if the result is an integer or fp
@@ -542,8 +569,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// explicitly cast them so that they aren't implicitly casted by the
/// getelementptr.
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
- Type *ResultTy,
- const TargetData *TD) {
+ Type *ResultTy, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
if (!TD) return 0;
Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
@@ -568,7 +595,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Constant *C =
ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
return C;
}
@@ -576,10 +603,11 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
/// constant expression, do so.
static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
- Type *ResultTy,
- const TargetData *TD) {
+ Type *ResultTy, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+ if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+ !Ptr->getType()->isPointerTy())
return 0;
Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
@@ -602,7 +630,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResultTy);
if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
- Res = ConstantFoldConstantExpression(ResCE, TD);
+ Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
return Res;
}
}
@@ -729,7 +757,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
/// Note that this fails if not all of the operands are constant. Otherwise,
/// this function can only fail when attempting to fold instructions like loads
/// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (PHINode *PN = dyn_cast<PHINode>(I)) {
Constant *CommonValue = 0;
@@ -765,7 +795,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- TD);
+ TD, TLI);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
@@ -781,28 +811,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
cast<Constant>(EVI->getAggregateOperand()),
EVI->getIndices());
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
/// using the specified TargetData. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
SmallVector<Constant*, 8> Ops;
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
i != e; ++i) {
Constant *NewC = cast<Constant>(*i);
// Recursively fold the ConstantExpr's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
- NewC = ConstantFoldConstantExpression(NewCE, TD);
+ NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
Ops.push_back(NewC);
}
if (CE->isCompare())
return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
- TD);
- return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD);
+ TD, TLI);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
}
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -817,7 +848,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
///
Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
ArrayRef<Constant *> Ops,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
@@ -830,11 +862,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
switch (Opcode) {
default: return 0;
case Instruction::ICmp:
- case Instruction::FCmp: assert(0 && "Invalid for compares");
+ case Instruction::FCmp: llvm_unreachable("Invalid for compares");
case Instruction::Call:
if (Function *F = dyn_cast<Function>(Ops.back()))
if (canConstantFoldCallTo(F))
- return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1));
+ return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
return 0;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
@@ -888,9 +920,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- if (Constant *C = CastGEPIndices(Ops, DestTy, TD))
+ if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
return C;
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD))
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
return C;
return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
@@ -903,7 +935,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
///
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Ops0, Constant *Ops1,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp (ptrtoint x), 0 -> icmp x, null
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
@@ -920,7 +953,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -929,7 +962,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
CE0->getType() == IntPtrTy) {
Constant *C = CE0->getOperand(0);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
}
}
@@ -944,7 +977,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
IntPtrTy, false);
Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
IntPtrTy, false);
- return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -953,7 +986,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
CE0->getType() == IntPtrTy &&
CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
- CE1->getOperand(0), TD);
+ CE1->getOperand(0), TD, TLI);
}
}
@@ -962,13 +995,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
Constant *LHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
+ TD, TLI);
Constant *RHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
+ TD, TLI);
unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
Constant *Ops[] = { LHS, RHS };
- return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD);
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
}
}
@@ -981,56 +1016,30 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
/// constant expression, or null if something is funny and we can't decide.
Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
ConstantExpr *CE) {
- if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+ if (!CE->getOperand(1)->isNullValue())
return 0; // Do not allow stepping over the value!
-
+
// Loop over all of the operands, tracking down which value we are
- // addressing...
- gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
- for (++I; I != E; ++I)
- if (StructType *STy = dyn_cast<StructType>(*I)) {
- ConstantInt *CU = cast<ConstantInt>(I.getOperand());
- assert(CU->getZExtValue() < STy->getNumElements() &&
- "Struct index out of range!");
- unsigned El = (unsigned)CU->getZExtValue();
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
- C = CS->getOperand(El);
- } else if (isa<ConstantAggregateZero>(C)) {
- C = Constant::getNullValue(STy->getElementType(El));
- } else if (isa<UndefValue>(C)) {
- C = UndefValue::get(STy->getElementType(El));
- } else {
- return 0;
- }
- } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
- if (ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
- if (CI->getZExtValue() >= ATy->getNumElements())
- return 0;
- if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
- C = CA->getOperand(CI->getZExtValue());
- else if (isa<ConstantAggregateZero>(C))
- C = Constant::getNullValue(ATy->getElementType());
- else if (isa<UndefValue>(C))
- C = UndefValue::get(ATy->getElementType());
- else
- return 0;
- } else if (VectorType *VTy = dyn_cast<VectorType>(*I)) {
- if (CI->getZExtValue() >= VTy->getNumElements())
- return 0;
- if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
- C = CP->getOperand(CI->getZExtValue());
- else if (isa<ConstantAggregateZero>(C))
- C = Constant::getNullValue(VTy->getElementType());
- else if (isa<UndefValue>(C))
- C = UndefValue::get(VTy->getElementType());
- else
- return 0;
- } else {
- return 0;
- }
- } else {
- return 0;
- }
+ // addressing.
+ for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
+ C = C->getAggregateElement(CE->getOperand(i));
+ if (C == 0) return 0;
+ }
+ return C;
+}
+
+/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
+/// indices (with an *implied* zero pointer index that is not in the list),
+/// return the constant value being addressed by a virtual load, or null if
+/// something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
+ ArrayRef<Constant*> Indices) {
+ // Loop over all of the operands, tracking down which value we are
+ // addressing.
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+ C = C->getAggregateElement(Indices[i]);
+ if (C == 0) return 0;
+ }
return C;
}
@@ -1045,6 +1054,7 @@ bool
llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::sqrt:
+ case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::bswap:
case Intrinsic::ctpop:
@@ -1115,7 +1125,6 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
if (Ty->isDoubleTy())
return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold float/double");
- return 0; // dummy return to suppress warning
}
static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
@@ -1132,7 +1141,6 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
if (Ty->isDoubleTy())
return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold float/double");
- return 0; // dummy return to suppress warning
}
/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
@@ -1143,11 +1151,8 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
/// available for the result. Returns null if the conversion cannot be
/// performed, otherwise returns the Constant value resulting from the
/// conversion.
-static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
- Type *Ty) {
- assert(Op && "Called with NULL operand");
- APFloat Val(Op->getValueAPF());
-
+static Constant *ConstantFoldConvertToInt(const APFloat &Val,
+ bool roundTowardZero, Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
assert(ResultWidth <= 64 &&
@@ -1168,7 +1173,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
/// ConstantFoldCall - Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI) {
if (!F->hasName()) return 0;
StringRef Name = F->getName();
@@ -1183,6 +1189,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
}
+ if (!TLI)
+ return 0;
if (!Ty->isFloatTy() && !Ty->isDoubleTy())
return 0;
@@ -1201,43 +1209,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
Op->getValueAPF().convertToDouble();
switch (Name[0]) {
case 'a':
- if (Name == "acos")
+ if (Name == "acos" && TLI->has(LibFunc::acos))
return ConstantFoldFP(acos, V, Ty);
- else if (Name == "asin")
+ else if (Name == "asin" && TLI->has(LibFunc::asin))
return ConstantFoldFP(asin, V, Ty);
- else if (Name == "atan")
+ else if (Name == "atan" && TLI->has(LibFunc::atan))
return ConstantFoldFP(atan, V, Ty);
break;
case 'c':
- if (Name == "ceil")
+ if (Name == "ceil" && TLI->has(LibFunc::ceil))
return ConstantFoldFP(ceil, V, Ty);
- else if (Name == "cos")
+ else if (Name == "cos" && TLI->has(LibFunc::cos))
return ConstantFoldFP(cos, V, Ty);
- else if (Name == "cosh")
+ else if (Name == "cosh" && TLI->has(LibFunc::cosh))
return ConstantFoldFP(cosh, V, Ty);
- else if (Name == "cosf")
+ else if (Name == "cosf" && TLI->has(LibFunc::cosf))
return ConstantFoldFP(cos, V, Ty);
break;
case 'e':
- if (Name == "exp")
+ if (Name == "exp" && TLI->has(LibFunc::exp))
return ConstantFoldFP(exp, V, Ty);
- if (Name == "exp2") {
+ if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
}
break;
case 'f':
- if (Name == "fabs")
+ if (Name == "fabs" && TLI->has(LibFunc::fabs))
return ConstantFoldFP(fabs, V, Ty);
- else if (Name == "floor")
+ else if (Name == "floor" && TLI->has(LibFunc::floor))
return ConstantFoldFP(floor, V, Ty);
break;
case 'l':
- if (Name == "log" && V > 0)
+ if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
return ConstantFoldFP(log, V, Ty);
- else if (Name == "log10" && V > 0)
+ else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
return ConstantFoldFP(log10, V, Ty);
else if (F->getIntrinsicID() == Intrinsic::sqrt &&
(Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1248,21 +1256,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
}
break;
case 's':
- if (Name == "sin")
+ if (Name == "sin" && TLI->has(LibFunc::sin))
return ConstantFoldFP(sin, V, Ty);
- else if (Name == "sinh")
+ else if (Name == "sinh" && TLI->has(LibFunc::sinh))
return ConstantFoldFP(sinh, V, Ty);
- else if (Name == "sqrt" && V >= 0)
+ else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sqrtf" && V >= 0)
+ else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sinf")
+ else if (Name == "sinf" && TLI->has(LibFunc::sinf))
return ConstantFoldFP(sin, V, Ty);
break;
case 't':
- if (Name == "tan")
+ if (Name == "tan" && TLI->has(LibFunc::tan))
return ConstantFoldFP(tan, V, Ty);
- else if (Name == "tanh")
+ else if (Name == "tanh" && TLI->has(LibFunc::tanh))
return ConstantFoldFP(tanh, V, Ty);
break;
default:
@@ -1277,10 +1285,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
case Intrinsic::ctpop:
return ConstantInt::get(Ty, Op->getValue().countPopulation());
- case Intrinsic::cttz:
- return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
- case Intrinsic::ctlz:
- return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
case Intrinsic::convert_from_fp16: {
APFloat Val(Op->getValue());
@@ -1300,24 +1304,31 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
}
}
- if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+ // Support ConstantVector in case we have an Undef in the top.
+ if (isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) {
+ Constant *Op = cast<Constant>(Operands[0]);
switch (F->getIntrinsicID()) {
default: break;
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse2_cvtsd2si:
case Intrinsic::x86_sse2_cvtsd2si64:
- if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
- return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty);
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
- if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
- return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty);
}
}
-
+
if (isa<UndefValue>(Operands[0])) {
if (F->getIntrinsicID() == Intrinsic::bswap)
return Operands[0];
@@ -1337,16 +1348,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
return 0;
-
+
double Op2V = Ty->isFloatTy() ?
(double)Op2->getValueAPF().convertToFloat():
Op2->getValueAPF().convertToDouble();
- if (Name == "pow")
+ if (F->getIntrinsicID() == Intrinsic::pow) {
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if (Name == "fmod")
+ }
+ if (!TLI)
+ return 0;
+ if (Name == "pow" && TLI->has(LibFunc::pow))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if (Name == "fmod" && TLI->has(LibFunc::fmod))
return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if (Name == "atan2")
+ if (Name == "atan2" && TLI->has(LibFunc::atan2))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
@@ -1361,7 +1377,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
return 0;
}
-
if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
switch (F->getIntrinsicID()) {
@@ -1375,7 +1390,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
APInt Res;
bool Overflow;
switch (F->getIntrinsicID()) {
- default: assert(0 && "Invalid case");
+ default: llvm_unreachable("Invalid case");
case Intrinsic::sadd_with_overflow:
Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
break;
@@ -1401,6 +1416,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
};
return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
}
+ case Intrinsic::cttz:
+ // FIXME: This should check for Op2 == 1, and become unreachable if
+ // Op1 == 0.
+ return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
+ case Intrinsic::ctlz:
+ // FIXME: This should check for Op2 == 1, and become unreachable if
+ // Op1 == 0.
+ return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
}
}
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index bfa429d54120..85913b11bef4 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -17,6 +17,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
@@ -76,10 +77,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
StringRef Directory, StringRef Producer,
bool isOptimized, StringRef Flags,
unsigned RunTimeVer) {
- assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89
- && "Invalid Language tag");
- assert (!Filename.empty()
- && "Unable to create compile unit without filename");
+ assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) ||
+ (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
+ "Invalid Language tag");
+ assert(!Filename.empty() &&
+ "Unable to create compile unit without filename");
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
Value *THElts[] = { TempEnumTypes };
@@ -189,7 +191,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
return DIType(MDNode::get(VMContext, Elts));
}
-/// createQaulifiedType - Create debugging information entry for a qualified
+/// createQualifiedType - Create debugging information entry for a qualified
/// type, e.g. 'const int'.
DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
// Qualified types are encoded in DIDerivedType format.
@@ -358,13 +360,58 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
return DIType(MDNode::get(VMContext, Elts));
}
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty, MDNode *PropertyNode) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ getNonCompileUnitScope(File),
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty,
+ PropertyNode
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCProperty - Create debugging information entry for Objective-C
+/// property.
+DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ StringRef GetterName,
+ StringRef SetterName,
+ unsigned PropertyAttributes,
+ DIType Ty) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ MDString::get(VMContext, GetterName),
+ MDString::get(VMContext, SetterName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes),
+ Ty
+ };
+ return DIObjCProperty(MDNode::get(VMContext, Elts));
+}
+
/// createClassType - Create debugging information entry for a class.
DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits,
uint64_t OffsetInBits, unsigned Flags,
DIType DerivedFrom, DIArray Elements,
- MDNode *VTableHoder, MDNode *TemplateParams) {
+ MDNode *VTableHolder, MDNode *TemplateParams) {
// TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
@@ -379,7 +426,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
DerivedFrom,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- VTableHoder,
+ VTableHolder,
TemplateParams
};
return DIType(MDNode::get(VMContext, Elts));
@@ -440,7 +487,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -465,7 +512,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -484,9 +531,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
ParameterTypes,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -500,7 +547,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNumber,
uint64_t SizeInBits,
uint64_t AlignInBits,
- DIArray Elements) {
+ DIArray Elements) {
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
@@ -512,7 +559,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -628,6 +675,31 @@ DIType DIBuilder::createTemporaryType(DIFile F) {
return DIType(Node);
}
+/// createForwardDecl - Create a temporary forward-declared type that
+/// can be RAUW'd if the full type is seen.
+DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIFile F,
+ unsigned Line, unsigned RuntimeLang) {
+ // Create a temporary MDNode.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ NULL, // TheCU
+ MDString::get(VMContext, Name),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ // To ease transition include sizes etc of 0.
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext),
+ DIDescriptor::FlagFwdDecl),
+ NULL,
+ DIArray(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ return DIType(Node);
+}
+
/// getOrCreateArray - Get a DIArray, create one if required.
DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
if (Elements.empty()) {
@@ -738,7 +810,7 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
Elts.push_back(MDString::get(VMContext, Name));
Elts.push_back(F);
Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
- (LineNo | (ArgNo << 24))));
+ (LineNo | (ArgNo << 24))));
Elts.push_back(Ty);
Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
@@ -754,6 +826,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
DIFile File, unsigned LineNo,
DIType Ty,
bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine,
unsigned Flags, bool isOptimized,
Function *Fn,
MDNode *TParams,
@@ -777,13 +850,14 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn,
TParams,
Decl,
- THolder
+ THolder,
+ ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine)
};
MDNode *Node = MDNode::get(VMContext, Elts);
@@ -831,7 +905,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
Fn,
TParam,
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- THolder
+ THolder,
+ // FIXME: Do we want to use a different scope lines?
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
};
MDNode *Node = MDNode::get(VMContext, Elts);
return DISubprogram(Node);
@@ -854,7 +930,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
/// createLexicalBlockFile - This creates a new MDNode that encapsulates
/// an existing scope with a new filename.
DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
- DIFile File) {
+ DIFile File) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
Scope,
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 44457d3c3de9..f61a8f3a5eb9 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+ if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
return CI->getZExtValue();
return 0;
@@ -289,6 +289,10 @@ bool DIDescriptor::isEnumerator() const {
return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
}
+/// isObjCProperty - Return true if the specified tag is DW_TAG
+bool DIDescriptor::isObjCProperty() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
+}
//===----------------------------------------------------------------------===//
// Simple Descriptor Constructors and other Methods
//===----------------------------------------------------------------------===//
@@ -373,6 +377,19 @@ bool DICompileUnit::Verify() const {
return true;
}
+/// Verify - Verify that an ObjC property is well formed.
+bool DIObjCProperty::Verify() const {
+ if (!DbgNode)
+ return false;
+ unsigned Tag = getTag();
+ if (Tag != dwarf::DW_TAG_APPLE_property) return false;
+ DIType Ty = getType();
+ if (!Ty.Verify()) return false;
+
+ // Don't worry about the rest of the strings for now.
+ return true;
+}
+
/// Verify - Verify that a type descriptor is well formed.
bool DIType::Verify() const {
if (!DbgNode)
@@ -482,6 +499,7 @@ bool DINameSpace::Verify() const {
/// return base type size.
uint64_t DIDerivedType::getOriginalTypeSize() const {
unsigned Tag = getTag();
+
if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
@@ -490,7 +508,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
// approach.
if (!BaseType.isValid())
return getSizeInBits();
- if (BaseType.isDerivedType())
+ // If this is a derived type, go ahead and get the base type, unless
+ // it's a reference then it's just the size of the field. Pointer types
+ // have no need of this since they're a different type of qualification
+ // on the type.
+ if (BaseType.getTag() == dwarf::DW_TAG_reference_type)
+ return getSizeInBits();
+ else if (BaseType.isDerivedType())
return DIDerivedType(BaseType).getOriginalTypeSize();
else
return BaseType.getSizeInBits();
@@ -499,6 +523,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
return getSizeInBits();
}
+/// getObjCProperty - Return property node, if this ivar is associated with one.
+MDNode *DIDerivedType::getObjCProperty() const {
+ if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10)
+ return NULL;
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
+}
+
/// isInlinedFnArgument - Return true if this variable provides debugging
/// information for an inlined function arguments.
bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
@@ -565,8 +596,7 @@ StringRef DIScope::getFilename() const {
return DIType(DbgNode).getFilename();
if (isFile())
return DIFile(DbgNode).getFilename();
- assert(0 && "Invalid DIScope!");
- return StringRef();
+ llvm_unreachable("Invalid DIScope!");
}
StringRef DIScope::getDirectory() const {
@@ -586,8 +616,7 @@ StringRef DIScope::getDirectory() const {
return DIType(DbgNode).getDirectory();
if (isFile())
return DIFile(DbgNode).getDirectory();
- assert(0 && "Invalid DIScope!");
- return StringRef();
+ llvm_unreachable("Invalid DIScope!");
}
DIArray DICompileUnit::getEnumTypes() const {
@@ -632,6 +661,32 @@ DIArray DICompileUnit::getGlobalVariables() const {
}
//===----------------------------------------------------------------------===//
+// DIDescriptor: vtable anchors for all descriptors.
+//===----------------------------------------------------------------------===//
+
+void DIScope::anchor() { }
+
+void DICompileUnit::anchor() { }
+
+void DIFile::anchor() { }
+
+void DIType::anchor() { }
+
+void DIBasicType::anchor() { }
+
+void DIDerivedType::anchor() { }
+
+void DICompositeType::anchor() { }
+
+void DISubprogram::anchor() { }
+
+void DILexicalBlock::anchor() { }
+
+void DINameSpace::anchor() { }
+
+void DILexicalBlockFile::anchor() { }
+
+//===----------------------------------------------------------------------===//
// DIDescriptor: dump routines for all descriptors.
//===----------------------------------------------------------------------===//
@@ -679,8 +734,13 @@ void DIType::print(raw_ostream &OS) const {
if (isBasicType())
DIBasicType(DbgNode).print(OS);
- else if (isDerivedType())
- DIDerivedType(DbgNode).print(OS);
+ else if (isDerivedType()) {
+ DIDerivedType DTy = DIDerivedType(DbgNode);
+ DTy.print(OS);
+ DICompositeType CTy = getDICompositeType(DTy);
+ if (CTy.Verify())
+ CTy.print(OS);
+ }
else if (isCompositeType())
DICompositeType(DbgNode).print(OS);
else {
@@ -698,7 +758,9 @@ void DIBasicType::print(raw_ostream &OS) const {
/// print - Print derived type.
void DIDerivedType::print(raw_ostream &OS) const {
- OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
+ OS << "\n\t Derived From: ";
+ getTypeDerivedFrom().print(OS);
+ OS << "\n\t";
}
/// print - Print composite type.
@@ -725,6 +787,9 @@ void DISubprogram::print(raw_ostream &OS) const {
if (isDefinition())
OS << " [def] ";
+ if (getScopeLineNumber() != getLineNumber())
+ OS << " [Scope: " << getScopeLineNumber() << "] ";
+
OS << "\n";
}
@@ -927,9 +992,30 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
/// processModule - Process entire module and collect debug info.
void DebugInfoFinder::processModule(Module &M) {
- if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu"))
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
- addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i)));
+ if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ addCompileUnit(CU);
+ if (CU.getVersion() > LLVMDebugVersion10) {
+ DIArray GVs = CU.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+ DIGlobalVariable DIG(GVs.getElement(i));
+ if (addGlobalVariable(DIG))
+ processType(DIG.getType());
+ }
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ processSubprogram(DISubprogram(SPs.getElement(i)));
+ DIArray EnumTypes = CU.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ processType(DIType(EnumTypes.getElement(i)));
+ DIArray RetainedTypes = CU.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ processType(DIType(RetainedTypes.getElement(i)));
+ return;
+ }
+ }
+ }
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 6de4e1e1d7de..1604576ec4ae 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -35,6 +35,8 @@ namespace {
};
}
+void DominanceFrontier::anchor() { }
+
const DominanceFrontier::DomSetType &
DominanceFrontier::calculate(const DominatorTree &DT,
const DomTreeNode *Node) {
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index eae83fdc369c..8ffef29870ae 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMipa
GlobalsModRef.cpp
IPA.cpp
)
-
-add_llvm_library_dependencies(LLVMipa
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 2e79eab51ff7..0df3e8a38218 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -127,16 +127,9 @@ private:
}
}
- // Loop over all of the users of the function, looking for non-call uses.
- for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
- User *U = *I;
- if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
- || !CallSite(cast<Instruction>(U)).isCallee(I)) {
- // Not a call, or being used as a parameter rather than as the callee.
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
- break;
- }
- }
+ // If this function has its address taken, anything could call it.
+ if (F->hasAddressTaken())
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
// If this function is not defined in this translation unit, it could call
// anything.
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index b226d66cd78a..c1d8e3e65a62 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -21,6 +21,7 @@
#include "llvm/Instructions.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -467,6 +468,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
} else if (isMalloc(&cast<Instruction>(*II)) ||
isFreeCall(&cast<Instruction>(*II))) {
FunctionEffect |= ModRef;
+ } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) {
+ // The callgraph doesn't include intrinsic calls.
+ Function *Callee = Intrinsic->getCalledFunction();
+ ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee);
+ FunctionEffect |= (Behaviour & ModRef);
}
if ((FunctionEffect & Mod) == 0)
diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt
new file mode 100644
index 000000000000..980e91809b55
--- /dev/null
+++ b/lib/Analysis/IPA/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Analysis/IPA/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IPA
+parent = Libraries
+library_name = ipa
+required_libraries = Analysis Core Support
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index d0ca8920ab9f..b80966b65a17 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -79,10 +79,44 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
return false;
}
-/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
+/// Return true if all loop headers that dominate this block are in simplified
+/// form.
+static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
+ const LoopInfo *LI,
+ SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ Loop *NearestLoop = 0;
+ for (DomTreeNode *Rung = DT->getNode(BB);
+ Rung; Rung = Rung->getIDom()) {
+ BasicBlock *DomBB = Rung->getBlock();
+ Loop *DomLoop = LI->getLoopFor(DomBB);
+ if (DomLoop && DomLoop->getHeader() == DomBB) {
+ // If the domtree walk reaches a loop with no preheader, return false.
+ if (!DomLoop->isLoopSimplifyForm())
+ return false;
+ // If we have already checked this loop nest, stop checking.
+ if (SimpleLoopNests.count(DomLoop))
+ break;
+ // If we have not already checked this loop nest, remember the loop
+ // header nearest to BB. The nearest loop may not contain BB.
+ if (!NearestLoop)
+ NearestLoop = DomLoop;
+ }
+ }
+ if (NearestLoop)
+ SimpleLoopNests.insert(NearestLoop);
+ return true;
+}
+
+/// AddUsersImpl - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+bool IVUsers::AddUsersImpl(Instruction *I,
+ SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ // Add this IV user to the Processed set before returning false to ensure that
+ // all IV users are members of the set. See IVUsers::isIVUserOrOperand.
+ if (!Processed.insert(I))
+ return true; // Instruction already handled.
+
if (!SE->isSCEVable(I->getType()))
return false; // Void and FP expressions cannot be reduced.
@@ -93,9 +127,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
return false;
- if (!Processed.insert(I))
- return true; // Instruction already handled.
-
// Get the symbolic expression for this instruction.
const SCEV *ISE = SE->getSCEV(I);
@@ -115,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
if (isa<PHINode>(User) && Processed.count(User))
continue;
+ // Only consider IVUsers that are dominated by simplified loop
+ // headers. Otherwise, SCEVExpander will crash.
+ BasicBlock *UseBB = User->getParent();
+ // A phi's use is live out of its predecessor block.
+ if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+ unsigned OperandNo = UI.getOperandNo();
+ unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+ UseBB = PHI->getIncomingBlock(ValNo);
+ }
+ if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
+ return false;
+
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
@@ -124,12 +167,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
- !AddUsersIfInteresting(User)) {
+ !AddUsersImpl(User, SimpleLoopNests)) {
DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
- } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
+ } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
@@ -153,6 +196,15 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
return true;
}
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+ // SCEVExpander can only handle users that are dominated by simplified loop
+ // entries. Keep track of all loops that are only dominated by other simple
+ // loops so we don't traverse the domtree for each user.
+ SmallPtrSet<Loop*,16> SimpleLoopNests;
+
+ return AddUsersImpl(I, SimpleLoopNests);
+}
+
IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
IVUses.push_back(new IVStrideUse(this, User, Operand));
return IVUses.back();
@@ -268,6 +320,7 @@ void IVStrideUse::transformToPostInc(const Loop *L) {
void IVStrideUse::deleted() {
// Remove this user from the list.
+ Parent->Processed.erase(this->getUser());
Parent->IVUses.erase(this);
// this now dangles!
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index e12e322c2a99..3e3d2ab75380 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -11,645 +11,1012 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "inline-cost"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/CallingConv.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
-/// callIsSmall - If a call is likely to lower to a single target instruction,
-/// or is otherwise deemed small return true.
-/// TODO: Perhaps calls like memcpy, strcpy, etc?
-bool llvm::callIsSmall(const Function *F) {
- if (!F) return false;
+STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+ typedef InstVisitor<CallAnalyzer, bool> Base;
+ friend class InstVisitor<CallAnalyzer, bool>;
+
+ // TargetData if available, or null.
+ const TargetData *const TD;
+
+ // The called function.
+ Function &F;
+
+ int Threshold;
+ int Cost;
+ const bool AlwaysInline;
+
+ bool IsRecursive;
+ bool ExposesReturnsTwice;
+ bool HasDynamicAlloca;
+ unsigned NumInstructions, NumVectorInstructions;
+ int FiftyPercentVectorBonus, TenPercentVectorBonus;
+ int VectorBonus;
+
+ // While we walk the potentially-inlined instructions, we build up and
+ // maintain a mapping of simplified values specific to this callsite. The
+ // idea is to propagate any special information we have about arguments to
+ // this call through the inlinable section of the function, and account for
+ // likely simplifications post-inlining. The most important aspect we track
+ // is CFG altering simplifications -- when we prove a basic block dead, that
+ // can cause dramatic shifts in the cost of inlining a function.
+ DenseMap<Value *, Constant *> SimplifiedValues;
+
+ // Keep track of the values which map back (through function arguments) to
+ // allocas on the caller stack which could be simplified through SROA.
+ DenseMap<Value *, Value *> SROAArgValues;
+
+ // The mapping of caller Alloca values to their accumulated cost savings. If
+ // we have to disable SROA for one of the allocas, this tells us how much
+ // cost must be added.
+ DenseMap<Value *, int> SROAArgCosts;
+
+ // Keep track of values which map to a pointer base and constant offset.
+ DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+ // Custom simplification helper routines.
+ bool isAllocaDerivedArg(Value *V);
+ bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+ DenseMap<Value *, int>::iterator &CostIt);
+ void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+ void disableSROA(Value *V);
+ void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+ bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+ ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+ // Custom analysis routines.
+ bool analyzeBlock(BasicBlock *BB);
+
+ // Disable several entry points to the visitor so we don't accidentally use
+ // them by declaring but not defining them here.
+ void visit(Module *); void visit(Module &);
+ void visit(Function *); void visit(Function &);
+ void visit(BasicBlock *); void visit(BasicBlock &);
+
+ // Provide base case for our instruction visit.
+ bool visitInstruction(Instruction &I);
+
+ // Our visit overrides.
+ bool visitAlloca(AllocaInst &I);
+ bool visitPHI(PHINode &I);
+ bool visitGetElementPtr(GetElementPtrInst &I);
+ bool visitBitCast(BitCastInst &I);
+ bool visitPtrToInt(PtrToIntInst &I);
+ bool visitIntToPtr(IntToPtrInst &I);
+ bool visitCastInst(CastInst &I);
+ bool visitUnaryInstruction(UnaryInstruction &I);
+ bool visitICmp(ICmpInst &I);
+ bool visitSub(BinaryOperator &I);
+ bool visitBinaryOperator(BinaryOperator &I);
+ bool visitLoad(LoadInst &I);
+ bool visitStore(StoreInst &I);
+ bool visitCallSite(CallSite CS);
+
+public:
+ CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
+ : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
+ AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+ IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
+ NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {
+ }
- if (F->hasLocalLinkage()) return false;
+ bool analyzeCall(CallSite CS);
- if (!F->hasName()) return false;
+ int getThreshold() { return Threshold; }
+ int getCost() { return Cost; }
- StringRef Name = F->getName();
+ // Keep a bunch of stats about the cost savings found so we can print them
+ // out when debugging.
+ unsigned NumConstantArgs;
+ unsigned NumConstantOffsetPtrArgs;
+ unsigned NumAllocaArgs;
+ unsigned NumConstantPtrCmps;
+ unsigned NumConstantPtrDiffs;
+ unsigned NumInstructionsSimplified;
+ unsigned SROACostSavings;
+ unsigned SROACostSavingsLost;
- // These will all likely lower to a single selection DAG node.
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
- Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
- Name == "sin" || Name == "sinf" || Name == "sinl" ||
- Name == "cos" || Name == "cosf" || Name == "cosl" ||
- Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
- return true;
+ void dump();
+};
- // These are all likely to be optimized into something smaller.
- if (Name == "pow" || Name == "powf" || Name == "powl" ||
- Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
- Name == "floor" || Name == "floorf" || Name == "ceil" ||
- Name == "round" || Name == "ffs" || Name == "ffsl" ||
- Name == "abs" || Name == "labs" || Name == "llabs")
- return true;
+} // namespace
- return false;
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+ return SROAArgValues.count(V);
}
-/// analyzeBasicBlock - Fill in the current structure with information gleaned
-/// from the specified block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
- const TargetData *TD) {
- ++NumBlocks;
- unsigned NumInstsBeforeThisBB = NumInsts;
- for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
- II != E; ++II) {
- if (isa<PHINode>(II)) continue; // PHI nodes don't count.
-
- // Special handling for calls.
- if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
- if (isa<DbgInfoIntrinsic>(II))
- continue; // Debug intrinsics don't count as size.
-
- ImmutableCallSite CS(cast<Instruction>(II));
-
- if (const Function *F = CS.getCalledFunction()) {
- // If a function is both internal and has a single use, then it is
- // extremely likely to get inlined in the future (it was probably
- // exposed by an interleaved devirtualization pass).
- if (F->hasInternalLinkage() && F->hasOneUse())
- ++NumInlineCandidates;
-
- // If this call is to function itself, then the function is recursive.
- // Inlining it into other functions is a bad idea, because this is
- // basically just a form of loop peeling, and our metrics aren't useful
- // for that case.
- if (F == BB->getParent())
- isRecursive = true;
- }
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+ Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+ if (SROAArgValues.empty() || SROAArgCosts.empty())
+ return false;
- if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
- // Each argument to a call takes on average one instruction to set up.
- NumInsts += CS.arg_size();
+ DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+ if (ArgIt == SROAArgValues.end())
+ return false;
- // We don't want inline asm to count as a call - that would prevent loop
- // unrolling. The argument setup cost is still real, though.
- if (!isa<InlineAsm>(CS.getCalledValue()))
- ++NumCalls;
- }
- }
+ Arg = ArgIt->second;
+ CostIt = SROAArgCosts.find(Arg);
+ return CostIt != SROAArgCosts.end();
+}
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
- if (!AI->isStaticAlloca())
- this->usesDynamicAlloca = true;
- }
+/// \brief Disable SROA for the candidate marked by this cost iterator.
+///
+/// This markes the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+ // If we're no longer able to perform SROA we need to undo its cost savings
+ // and prevent subsequent analysis.
+ Cost += CostIt->second;
+ SROACostSavings -= CostIt->second;
+ SROACostSavingsLost += CostIt->second;
+ SROAArgCosts.erase(CostIt);
+}
- if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
- ++NumVectorInsts;
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+ disableSROA(CostIt);
+}
- if (const CastInst *CI = dyn_cast<CastInst>(II)) {
- // Noop casts, including ptr <-> int, don't count.
- if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
- isa<PtrToIntInst>(CI))
- continue;
- // trunc to a native type is free (assuming the target has compare and
- // shift-right of the same width).
- if (isa<TruncInst>(CI) && TD &&
- TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
- continue;
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- continue;
- } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
- // If a GEP has all constant indices, it will probably be folded with
- // a load/store.
- if (GEPI->hasAllConstantIndices())
- continue;
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ CostIt->second += InstructionCost;
+ SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ if (IsSROAValid) {
+ accumulateSROACost(CostIt, InstructionCost);
+ return true;
+ }
+
+ disableSROA(CostIt);
+ return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+ return false;
+
+ return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+ if (!TD)
+ return false;
+
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ assert(IntPtrWidth == Offset.getBitWidth());
+
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+ OpC = dyn_cast<ConstantInt>(SimpleOp);
+ if (!OpC)
+ return false;
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
+ Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+ continue;
}
- ++NumInsts;
+ APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+ Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
+ return true;
+}
- if (isa<ReturnInst>(BB->getTerminator()))
- ++NumRets;
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+ // FIXME: Check whether inlining will turn a dynamic alloca into a static
+ // alloca, and handle that case.
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this indirect
- // jump would jump from the inlined copy of the function into the original
- // function which is extremely undefined behavior.
- if (isa<IndirectBrInst>(BB->getTerminator()))
- containsIndirectBr = true;
+ // We will happily inline static alloca instructions or dynamic alloca
+ // instructions in always-inline situations.
+ if (AlwaysInline || I.isStaticAlloca())
+ return Base::visitAlloca(I);
- // Remember NumInsts for this BB.
- NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+ // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+ // a variety of reasons, and so we would like to not inline them into
+ // functions which don't currently have a dynamic alloca. This simply
+ // disables inlining altogether in the presence of a dynamic alloca.
+ HasDynamicAlloca = true;
+ return false;
}
-// CountCodeReductionForConstant - Figure out an approximation for how many
-// instructions will be constant folded if the specified value is constant.
-//
-unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
- unsigned Reduction = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
- // We will be able to eliminate all but one of the successors.
- const TerminatorInst &TI = cast<TerminatorInst>(*U);
- const unsigned NumSucc = TI.getNumSuccessors();
- unsigned Instrs = 0;
- for (unsigned I = 0; I != NumSucc; ++I)
- Instrs += NumBBInsts[TI.getSuccessor(I)];
- // We don't know which blocks will be eliminated, so use the average size.
- Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
- } else {
- // Figure out if this instruction will be removed due to simple constant
- // propagation.
- Instruction &Inst = cast<Instruction>(*U);
-
- // We can't constant propagate instructions which have effects or
- // read memory.
- //
- // FIXME: It would be nice to capture the fact that a load from a
- // pointer-to-constant-global is actually a *really* good thing to zap.
- // Unfortunately, we don't know the pointer that may get propagated here,
- // so we can't make this decision.
- if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
- isa<AllocaInst>(Inst))
- continue;
+bool CallAnalyzer::visitPHI(PHINode &I) {
+ // FIXME: We should potentially be tracking values through phi nodes,
+ // especially when they collapse to a single value due to deleted CFG edges
+ // during inlining.
- bool AllOperandsConstant = true;
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
- if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
- AllOperandsConstant = false;
- break;
- }
+ // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+ // though we don't want to propagate it's bonuses. The idea is to disable
+ // SROA if it *might* be used in an inappropriate manner.
- if (AllOperandsConstant) {
- // We will get to remove this instruction...
- Reduction += InlineConstants::InstrCost;
+ // Phi nodes are always zero-cost.
+ return true;
+}
- // And any other instructions that use it which become constants
- // themselves.
- Reduction += CountCodeReductionForConstant(&Inst);
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+ SROAArg, CostIt);
+
+ // Try to fold GEPs of constant-offset call site argument pointers. This
+ // requires target data and inbounds GEPs.
+ if (TD && I.isInBounds()) {
+ // Check if we have a base + offset for the pointer.
+ Value *Ptr = I.getPointerOperand();
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+ if (BaseAndOffset.first) {
+ // Check if the offset of this GEP is constant, and if so accumulate it
+ // into Offset.
+ if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+ // Non-constant GEPs aren't folded, and disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
+ return false;
}
+
+ // Add the result as a new mapping to Base + Offset.
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ // Also handle SROA candidates here, we already know that the GEP is
+ // all-constant indexed.
+ if (SROACandidate)
+ SROAArgValues[&I] = SROAArg;
+
+ return true;
}
}
- return Reduction;
-}
-// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
-// the function will be if it is inlined into a context where an argument
-// becomes an alloca.
-//
-unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
- if (!V->getType()->isPointerTy()) return 0; // Not a pointer
- unsigned Reduction = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- Instruction *I = cast<Instruction>(*UI);
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
- Reduction += InlineConstants::InstrCost;
- else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has variable indices, we won't be able to do much with it.
- if (GEP->hasAllConstantIndices())
- Reduction += CountCodeReductionForAlloca(GEP);
- } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
- // Track pointer through bitcasts.
- Reduction += CountCodeReductionForAlloca(BCI);
- } else {
- // If there is some other strange instruction, we're not going to be able
- // to do much if we inline this.
- return 0;
- }
+ if (isGEPOffsetConstant(I)) {
+ if (SROACandidate)
+ SROAArgValues[&I] = SROAArg;
+
+ // Constant GEPs are modeled as free.
+ return true;
}
- return Reduction;
+ // Variable GEPs will require math and will disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
+ return false;
}
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
- // If this function contains a call to setjmp or _setjmp, never inline
- // it. This is a hack because we depend on the user marking their local
- // variables as volatile if they are live across a setjmp call, and they
- // probably won't do this in callers.
- if (F->callsFunctionThatReturnsTwice())
- callsSetJmp = true;
-
- // Look at the size of the callee.
- for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- analyzeBasicBlock(&*BB, TD);
-}
+bool CallAnalyzer::visitBitCast(BitCastInst &I) {
+ // Propagate constants through bitcasts.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
- const TargetData *TD) {
- Metrics.analyzeFunction(F, TD);
-
- // A function with exactly one return has it removed during the inlining
- // process (see InlineFunction), so don't count it.
- // FIXME: This knowledge should really be encoded outside of FunctionInfo.
- if (Metrics.NumRets==1)
- --Metrics.NumInsts;
-
- // Check out all of the arguments to the function, figuring out how much
- // code can be eliminated if one of the arguments is a constant.
- ArgumentWeights.reserve(F->arg_size());
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
- Metrics.CountCodeReductionForAlloca(I)));
+ // Track base/offsets through casts
+ std::pair<Value *, APInt> BaseAndOffset
+ = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ // Casts don't change the offset, just wrap it up.
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ // Also look for SROA candidates here.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ // Bitcasts are always zero cost.
+ return true;
}
-/// NeverInline - returns true if the function should never be inlined into
-/// any caller
-bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
- return (Metrics.callsSetJmp || Metrics.isRecursive ||
- Metrics.containsIndirectBr);
+bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+ // Propagate constants through ptrtoint.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Track base/offset pairs when converted to a plain integer provided the
+ // integer is large enough to represent the pointer.
+ unsigned IntegerSize = I.getType()->getScalarSizeInBits();
+ if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+ std::pair<Value *, APInt> BaseAndOffset
+ = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+ }
+
+ // This is really weird. Technically, ptrtoint will disable SROA. However,
+ // unless that ptrtoint is *used* somewhere in the live basic blocks after
+ // inlining, it will be nuked, and SROA should proceed. All of the uses which
+ // would block SROA would also block SROA if applied directly to a pointer,
+ // and so we can just add the integer in here. The only places where SROA is
+ // preserved either cannot fire on an integer, or won't in-and-of themselves
+ // disable SROA (ext) w/o some later use that we would see and disable.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ // A ptrtoint cast is free so long as the result is large enough to store the
+ // pointer, and a legal integer type.
+ return TD && TD->isLegalInteger(IntegerSize) &&
+ IntegerSize >= TD->getPointerSizeInBits();
}
-// getSpecializationBonus - The heuristic used to determine the per-call
-// performance boost for using a specialization of Callee with argument
-// specializedArgNo replaced by a constant.
-int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
- SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
- if (Callee->mayBeOverridden())
- return 0;
- int Bonus = 0;
- // If this function uses the coldcc calling convention, prefer not to
- // specialize it.
- if (Callee->getCallingConv() == CallingConv::Cold)
- Bonus -= InlineConstants::ColdccPenalty;
-
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
-
- unsigned ArgNo = 0;
- unsigned i = 0;
- for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
- I != E; ++I, ++ArgNo)
- if (ArgNo == SpecializedArgNos[i]) {
- ++i;
- Bonus += CountBonusForConstant(I);
+bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+ // Propagate constants through ptrtoint.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
}
- // Calls usually take a long time, so they make the specialization gain
- // smaller.
- Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+ // Track base/offset pairs when round-tripped through a pointer without
+ // modifications provided the integer is not too large.
+ Value *Op = I.getOperand(0);
+ unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
+ if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+ }
+
+ // "Propagate" SROA here in the same manner as we do for ptrtoint above.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
- return Bonus;
+ // An inttoptr cast is free so long as the input is a legal integer type
+ // which doesn't contain values outside the range of a pointer.
+ return TD && TD->isLegalInteger(IntegerSize) &&
+ IntegerSize <= TD->getPointerSizeInBits();
}
-// ConstantFunctionBonus - Figure out how much of a bonus we can get for
-// possibly devirtualizing a function. We'll subtract the size of the function
-// we may wish to inline from the indirect call bonus providing a limit on
-// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
-// inlining because we decide we don't want to give a bonus for
-// devirtualizing.
-int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+bool CallAnalyzer::visitCastInst(CastInst &I) {
+ // Propagate constants through ptrtoint.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
- // This could just be NULL.
- if (!C) return 0;
+ // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+ disableSROA(I.getOperand(0));
- Function *F = dyn_cast<Function>(C);
- if (!F) return 0;
+ // No-op casts don't have any cost.
+ if (I.isLosslessCast())
+ return true;
- int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
- return (Bonus > 0) ? 0 : Bonus;
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (TD && isa<TruncInst>(I) &&
+ TD->isLegalInteger(TD->getTypeSizeInBits(I.getType())))
+ return true;
+
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // no-ops on most sane targets.
+ if (isa<CmpInst>(I.getOperand(0)))
+ return true;
+
+ // Assume the rest of the casts require work.
+ return false;
}
-// CountBonusForConstant - Figure out an approximation for how much per-call
-// performance boost we can expect if the specified value is constant.
-int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
- unsigned Bonus = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // Turning an indirect call into a direct call is a BIG win
- if (CI->getCalledValue() == V)
- Bonus += ConstantFunctionBonus(CallSite(CI), C);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
- // Turning an indirect call into a direct call is a BIG win
- if (II->getCalledValue() == V)
- Bonus += ConstantFunctionBonus(CallSite(II), C);
+bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
+ Value *Operand = I.getOperand(0);
+ Constant *Ops[1] = { dyn_cast<Constant>(Operand) };
+ if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand)))
+ if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
+ Ops, TD)) {
+ SimplifiedValues[&I] = C;
+ return true;
}
- // FIXME: Eliminating conditional branches and switches should
- // also yield a per-call performance boost.
- else {
- // Figure out the bonuses that wll accrue due to simple constant
- // propagation.
- Instruction &Inst = cast<Instruction>(*U);
-
- // We can't constant propagate instructions which have effects or
- // read memory.
- //
- // FIXME: It would be nice to capture the fact that a load from a
- // pointer-to-constant-global is actually a *really* good thing to zap.
- // Unfortunately, we don't know the pointer that may get propagated here,
- // so we can't make this decision.
- if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
- isa<AllocaInst>(Inst))
- continue;
- bool AllOperandsConstant = true;
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
- if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
- AllOperandsConstant = false;
- break;
- }
+ // Disable any SROA on the argument to arbitrary unary operators.
+ disableSROA(Operand);
- if (AllOperandsConstant)
- Bonus += CountBonusForConstant(&Inst);
+ return false;
+}
+
+bool CallAnalyzer::visitICmp(ICmpInst &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ // First try to handle simplified comparisons.
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Otherwise look for a comparison between constant offset pointers with
+ // a common base.
+ Value *LHSBase, *RHSBase;
+ APInt LHSOffset, RHSOffset;
+ llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+ if (LHSBase) {
+ llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+ if (RHSBase && LHSBase == RHSBase) {
+ // We have common bases, fold the icmp to a constant based on the
+ // offsets.
+ Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+ if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ ++NumConstantPtrCmps;
+ return true;
+ }
}
}
- return Bonus;
-}
+ // If the comparison is an equality comparison with null, we can simplify it
+ // for any alloca-derived argument.
+ if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
+ if (isAllocaDerivedArg(I.getOperand(0))) {
+ // We can actually predict the result of comparisons between an
+ // alloca-derived value and null. Note that this fires regardless of
+ // SROA firing.
+ bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+ SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+ : ConstantInt::getFalse(I.getType());
+ return true;
+ }
+
+ // Finally check for SROA candidates in comparisons.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (isa<ConstantPointerNull>(I.getOperand(1))) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
+ }
-int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
-
- // InlineCost - This value measures how good of an inline candidate this call
- // site is to inline. A lower inline cost make is more likely for the call to
- // be inlined. This value may go negative.
- //
- int InlineCost = 0;
-
- // Compute any size reductions we can expect due to arguments being passed into
- // the function.
- //
- unsigned ArgNo = 0;
- CallSite::arg_iterator I = CS.arg_begin();
- for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
- FI != FE; ++I, ++FI, ++ArgNo) {
-
- // If an alloca is passed in, inlining this function is likely to allow
- // significant future optimization possibilities (like scalar promotion, and
- // scalarization), so encourage the inlining of the function.
- //
- if (isa<AllocaInst>(I))
- InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
-
- // If this is a constant being passed into the function, use the argument
- // weights calculated for the callee to determine how much will be folded
- // away with this information.
- else if (isa<Constant>(I))
- InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
+ disableSROA(CostIt);
}
- // Each argument passed in has a cost at both the caller and the callee
- // sides. Measurements show that each argument costs about the same as an
- // instruction.
- InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+ return false;
+}
- // Now that we have considered all of the factors that make the call site more
- // likely to be inlined, look at factors that make us not want to inline it.
+bool CallAnalyzer::visitSub(BinaryOperator &I) {
+ // Try to handle a special case: we can fold computing the difference of two
+ // constant-related pointers.
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Value *LHSBase, *RHSBase;
+ APInt LHSOffset, RHSOffset;
+ llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+ if (LHSBase) {
+ llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+ if (RHSBase && LHSBase == RHSBase) {
+ // We have common bases, fold the subtract to a constant based on the
+ // offsets.
+ Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+ if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ ++NumConstantPtrDiffs;
+ return true;
+ }
+ }
+ }
- // Calls usually take a long time, so they make the inlining gain smaller.
- InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+ // Otherwise, fall back to the generic logic for simplifying and handling
+ // instructions.
+ return Base::visitSub(I);
+}
- // Look at the size of the callee. Each instruction counts as 5.
- InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
- return InlineCost;
-}
+ // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
+ disableSROA(LHS);
+ disableSROA(RHS);
-int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
-
- bool isDirectCall = CS.getCalledFunction() == Callee;
- Instruction *TheCall = CS.getInstruction();
- int Bonus = 0;
-
- // If there is only one call of the function, and it has internal linkage,
- // make it almost guaranteed to be inlined.
- //
- if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
- Bonus += InlineConstants::LastCallToStaticBonus;
-
- // If the instruction after the call, or if the normal destination of the
- // invoke is an unreachable instruction, the function is noreturn. As such,
- // there is little point in inlining this.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
- if (isa<UnreachableInst>(II->getNormalDest()->begin()))
- Bonus += InlineConstants::NoreturnPenalty;
- } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
- Bonus += InlineConstants::NoreturnPenalty;
-
- // If this function uses the coldcc calling convention, prefer not to inline
- // it.
- if (Callee->getCallingConv() == CallingConv::Cold)
- Bonus += InlineConstants::ColdccPenalty;
-
- // Add to the inline quality for properties that make the call valuable to
- // inline. This includes factors that indicate that the result of inlining
- // the function will be optimizable. Currently this just looks at arguments
- // passed into the function.
- //
- CallSite::arg_iterator I = CS.arg_begin();
- for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
- FI != FE; ++I, ++FI)
- // Compute any constant bonus due to inlining we want to give here.
- if (isa<Constant>(I))
- Bonus += CountBonusForConstant(FI, cast<Constant>(I));
-
- return Bonus;
+ return false;
}
-// getInlineCost - The heuristic used to determine if we should inline the
-// function call or not.
-//
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
- SmallPtrSet<const Function*, 16> &NeverInline) {
- return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
-}
+bool CallAnalyzer::visitLoad(LoadInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (I.isSimple()) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
+ }
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
- Function *Callee,
- SmallPtrSet<const Function*, 16> &NeverInline) {
- Instruction *TheCall = CS.getInstruction();
- Function *Caller = TheCall->getParent()->getParent();
+ disableSROA(CostIt);
+ }
- // Don't inline functions which can be redefined at link-time to mean
- // something else. Don't inline functions marked noinline or call sites
- // marked noinline.
- if (Callee->mayBeOverridden() ||
- Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
- CS.isNoInline())
- return llvm::InlineCost::getNever();
+ return false;
+}
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+bool CallAnalyzer::visitStore(StoreInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (I.isSimple()) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
+ }
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
+ disableSROA(CostIt);
+ }
- // If we should never inline this, return a huge cost.
- if (CalleeFI->NeverInline())
- return InlineCost::getNever();
+ return false;
+}
- // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
- // could move this up and avoid computing the FunctionInfo for
- // things we are going to just return always inline for. This
- // requires handling setjmp somewhere else, however.
- if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
- return InlineCost::getAlways();
+bool CallAnalyzer::visitCallSite(CallSite CS) {
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+ !F.hasFnAttr(Attribute::ReturnsTwice)) {
+ // This aborts the entire analysis.
+ ExposesReturnsTwice = true;
+ return false;
+ }
- if (CalleeFI->Metrics.usesDynamicAlloca) {
- // Get information about the caller.
- FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return Base::visitCallSite(CS);
+
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // SROA can usually chew through these intrinsics and they have no cost
+ // so don't pay the price of analyzing them in detail.
+ return true;
+ }
+ }
+
+ if (Function *F = CS.getCalledFunction()) {
+ if (F == CS.getInstruction()->getParent()->getParent()) {
+ // This flag will fully abort the analysis, so don't bother with anything
+ // else.
+ IsRecursive = true;
+ return false;
+ }
- // If we haven't calculated this information yet, do so now.
- if (CallerFI.Metrics.NumBlocks == 0) {
- CallerFI.analyzeFunction(Caller, TD);
+ if (!callIsSmall(F)) {
+ // We account for the average 1 instruction per call argument setup
+ // here.
+ Cost += CS.arg_size() * InlineConstants::InstrCost;
- // Recompute the CalleeFI pointer, getting Caller could have invalidated
- // it.
- CalleeFI = &CachedFunctionInfo[Callee];
+ // Everything other than inline ASM will also have a significant cost
+ // merely from making the call.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ Cost += InlineConstants::CallPenalty;
}
- // Don't inline a callee with dynamic alloca into a caller without them.
- // Functions containing dynamic alloca's are inefficient in various ways;
- // don't create more inefficiency.
- if (!CallerFI.Metrics.usesDynamicAlloca)
- return InlineCost::getNever();
+ return Base::visitCallSite(CS);
}
- // InlineCost - This value measures how good of an inline candidate this call
- // site is to inline. A lower inline cost make is more likely for the call to
- // be inlined. This value may go negative due to the fact that bonuses
- // are negative numbers.
- //
- int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
- return llvm::InlineCost::get(InlineCost);
+ // Otherwise we're in a very special case -- an indirect function call. See
+ // if we can be particularly clever about this.
+ Value *Callee = CS.getCalledValue();
+
+ // First, pay the price of the argument setup. We account for the average
+ // 1 instruction per call argument setup here.
+ Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+ // Next, check if this happens to be an indirect function call to a known
+ // function in this inline context. If not, we've done all we can.
+ Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+ if (!F)
+ return Base::visitCallSite(CS);
+
+ // If we have a constant that we are calling as a function, we can peer
+ // through it and see the function target. This happens not infrequently
+ // during devirtualization and so we want to give it a hefty bonus for
+ // inlining, but cap that bonus in the event that inlining wouldn't pan
+ // out. Pretend to inline the function, with a custom threshold.
+ CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold);
+ if (CA.analyzeCall(CS)) {
+ // We were able to inline the indirect call! Subtract the cost from the
+ // bonus we want to apply, but don't go below zero.
+ Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+ }
+
+ return Base::visitCallSite(CS);
}
-// getSpecializationCost - The heuristic used to determine the code-size
-// impact of creating a specialized version of Callee with argument
-// SpecializedArgNo replaced by a constant.
-InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
- SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
- // Don't specialize functions which can be redefined at link-time to mean
- // something else.
- if (Callee->mayBeOverridden())
- return llvm::InlineCost::getNever();
+bool CallAnalyzer::visitInstruction(Instruction &I) {
+ // We found something we don't understand or can't handle. Mark any SROA-able
+ // values in the operand list as no longer viable.
+ for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
+ disableSROA(*OI);
+
+ return false;
+}
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
+/// \brief Analyze a basic block for its contribution to the inline cost.
+///
+/// This method walks the analyzer over every instruction in the given basic
+/// block and accounts for their cost during inlining at this callsite. It
+/// aborts early if the threshold has been exceeded or an impossible to inline
+/// construct has been detected. It returns false if inlining is no longer
+/// viable, and true if inlining remains viable.
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
+ I != E; ++I) {
+ ++NumInstructions;
+ if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
+ ++NumVectorInstructions;
+
+ // If the instruction simplified to a constant, there is no cost to this
+ // instruction. Visit the instructions using our InstVisitor to account for
+ // all of the per-instruction logic. The visit tree returns true if we
+ // consumed the instruction in any way, and false if the instruction's base
+ // cost should count against inlining.
+ if (Base::visit(I))
+ ++NumInstructionsSimplified;
+ else
+ Cost += InlineConstants::InstrCost;
+
+ // If the visit this instruction detected an uninlinable pattern, abort.
+ if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
+ return false;
+
+ if (NumVectorInstructions > NumInstructions/2)
+ VectorBonus = FiftyPercentVectorBonus;
+ else if (NumVectorInstructions > NumInstructions/10)
+ VectorBonus = TenPercentVectorBonus;
+ else
+ VectorBonus = 0;
+
+ // Check if we've past the threshold so we don't spin in huge basic
+ // blocks that will never inline.
+ if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+ return false;
+ }
- int Cost = 0;
+ return true;
+}
- // Look at the original size of the callee. Each instruction counts as 5.
- Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
+ if (!TD || !V->getType()->isPointerTy())
+ return 0;
- // Offset that with the amount of code that can be constant-folded
- // away with the given arguments replaced by constants.
- for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
- ae = SpecializedArgNos.end(); an != ae; ++an)
- Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
+ return 0;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ break;
+ V = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
- return llvm::InlineCost::get(Cost);
+ Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+ return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
-// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
-// higher threshold to determine if the function call should be inlined.
-float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
- Function *Callee = CS.getCalledFunction();
-
- // Get information about the callee.
- FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI.Metrics.NumBlocks == 0)
- CalleeFI.analyzeFunction(Callee, TD);
-
- float Factor = 1.0f;
- // Single BB functions are often written to be inlined.
- if (CalleeFI.Metrics.NumBlocks == 1)
- Factor += 0.5f;
-
- // Be more aggressive if the function contains a good chunk (if it mades up
- // at least 10% of the instructions) of vector instructions.
- if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
- Factor += 2.0f;
- else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
- Factor += 1.5f;
- return Factor;
-}
+/// \brief Analyze a call site for potential inlining.
+///
+/// Returns true if inlining this call is viable, and false if it is not
+/// viable. It computes the cost and adjusts the threshold based on numerous
+/// factors and heuristics. If this method returns false but the computed cost
+/// is below the computed threshold, then inlining was forcibly disabled by
+/// some artifact of the rountine.
+bool CallAnalyzer::analyzeCall(CallSite CS) {
+ ++NumCallsAnalyzed;
+
+ // Track whether the post-inlining function would have more than one basic
+ // block. A single basic block is often intended for inlining. Balloon the
+ // threshold by 50% until we pass the single-BB phase.
+ bool SingleBB = true;
+ int SingleBBBonus = Threshold / 2;
+ Threshold += SingleBBBonus;
+
+ // Unless we are always-inlining, perform some tweaks to the cost and
+ // threshold based on the direct callsite information.
+ if (!AlwaysInline) {
+ // We want to more aggressively inline vector-dense kernels, so up the
+ // threshold, and we'll lower it if the % of vector instructions gets too
+ // low.
+ assert(NumInstructions == 0);
+ assert(NumVectorInstructions == 0);
+ FiftyPercentVectorBonus = Threshold;
+ TenPercentVectorBonus = Threshold / 2;
+
+ // Subtract off one instruction per call argument as those will be free after
+ // inlining.
+ Cost -= CS.arg_size() * InlineConstants::InstrCost;
+
+ // If there is only one call of the function, and it has internal linkage,
+ // the cost of inlining it drops dramatically.
+ if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
+ Cost += InlineConstants::LastCallToStaticBonus;
+
+ // If the instruction after the call, or if the normal destination of the
+ // invoke is an unreachable instruction, the function is noreturn. As such,
+ // there is little point in inlining this unless there is literally zero cost.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+ Threshold = 1;
+ } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction())))
+ Threshold = 1;
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (F.getCallingConv() == CallingConv::Cold)
+ Cost += InlineConstants::ColdccPenalty;
+
+ // Check if we're done. This can happen due to bonuses and penalties.
+ if (Cost > Threshold)
+ return false;
+ }
-/// growCachedCostInfo - update the cached cost info for Caller after Callee has
-/// been inlined.
-void
-InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
- CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
+ if (F.empty())
+ return true;
- // For small functions we prefer to recalculate the cost for better accuracy.
- if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) {
- resetCachedCostInfo(Caller);
- return;
+ // Track whether we've seen a return instruction. The first return
+ // instruction is free, as at least one will usually disappear in inlining.
+ bool HasReturn = false;
+
+ // Populate our simplified values by mapping from function arguments to call
+ // arguments with known important simplifications.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
+ FAI != FAE; ++FAI, ++CAI) {
+ assert(CAI != CS.arg_end());
+ if (Constant *C = dyn_cast<Constant>(CAI))
+ SimplifiedValues[FAI] = C;
+
+ Value *PtrArg = *CAI;
+ if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
+ ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+
+ // We can SROA any pointer arguments derived from alloca instructions.
+ if (isa<AllocaInst>(PtrArg)) {
+ SROAArgValues[FAI] = PtrArg;
+ SROAArgCosts[PtrArg] = 0;
+ }
+ }
}
+ NumConstantArgs = SimplifiedValues.size();
+ NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
+ NumAllocaArgs = SROAArgValues.size();
+
+ // The worklist of live basic blocks in the callee *after* inlining. We avoid
+ // adding basic blocks of the callee which can be proven to be dead for this
+ // particular call site in order to get more accurate cost estimates. This
+ // requires a somewhat heavyweight iteration pattern: we need to walk the
+ // basic blocks in a breadth-first order as we insert live successors. To
+ // accomplish this, prioritizing for small iterations because we exit after
+ // crossing our threshold, we use a small-size optimized SetVector.
+ typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
+ SmallPtrSet<BasicBlock *, 16> > BBSetVector;
+ BBSetVector BBWorklist;
+ BBWorklist.insert(&F.getEntryBlock());
+ // Note that we *must not* cache the size, this loop grows the worklist.
+ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+ // Bail out the moment we cross the threshold. This means we'll under-count
+ // the cost, but only when undercounting doesn't matter.
+ if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+ break;
+
+ BasicBlock *BB = BBWorklist[Idx];
+ if (BB->empty())
+ continue;
+
+ // Handle the terminator cost here where we can track returns and other
+ // function-wide constructs.
+ TerminatorInst *TI = BB->getTerminator();
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this indirect
+ // jump would jump from the inlined copy of the function into the original
+ // function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions
+ // with indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably
+ // don't want to inline this function.
+ if (isa<IndirectBrInst>(TI))
+ return false;
+
+ if (!HasReturn && isa<ReturnInst>(TI))
+ HasReturn = true;
+ else
+ Cost += InlineConstants::InstrCost;
+
+ // Analyze the cost of this block. If we blow through the threshold, this
+ // returns false, and we can bail on out.
+ if (!analyzeBlock(BB)) {
+ if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
+ return false;
+ break;
+ }
- // For large functions, we can save a lot of computation time by skipping
- // recalculations.
- if (CallerMetrics.NumCalls > 0)
- --CallerMetrics.NumCalls;
+ // Add in the live successors by first checking whether we have terminator
+ // that may be simplified based on the values simplified by this call.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional()) {
+ Value *Cond = BI->getCondition();
+ if (ConstantInt *SimpleCond
+ = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+ BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
+ continue;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Value *Cond = SI->getCondition();
+ if (ConstantInt *SimpleCond
+ = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+ BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+ continue;
+ }
+ }
- if (Callee == 0) return;
+ // If we're unable to select a particular successor, just count all of
+ // them.
+ for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx)
+ BBWorklist.insert(TI->getSuccessor(TIdx));
+
+ // If we had any successors at this point, than post-inlining is likely to
+ // have them as well. Note that we assume any basic blocks which existed
+ // due to branches or switches which folded above will also fold after
+ // inlining.
+ if (SingleBB && TI->getNumSuccessors() > 1) {
+ // Take off the bonus we applied to the threshold.
+ Threshold -= SingleBBBonus;
+ SingleBB = false;
+ }
+ }
- CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+ Threshold += VectorBonus;
- // If we don't have metrics for the callee, don't recalculate them just to
- // update an approximation in the caller. Instead, just recalculate the
- // caller info from scratch.
- if (CalleeMetrics.NumBlocks == 0) {
- resetCachedCostInfo(Caller);
- return;
- }
+ return AlwaysInline || Cost < Threshold;
+}
- // Since CalleeMetrics were already calculated, we know that the CallerMetrics
- // reference isn't invalidated: both were in the DenseMap.
- CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
-
- // FIXME: If any of these three are true for the callee, the callee was
- // not inlined into the caller, so I think they're redundant here.
- CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
- CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
- CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
-
- CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
- CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
- CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
- CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
- CallerMetrics.NumRets += CalleeMetrics.NumRets;
-
- // analyzeBasicBlock counts each function argument as an inst.
- if (CallerMetrics.NumInsts >= Callee->arg_size())
- CallerMetrics.NumInsts -= Callee->arg_size();
- else
- CallerMetrics.NumInsts = 0;
-
- // We are not updating the argument weights. We have already determined that
- // Caller is a fairly large function, so we accept the loss of precision.
+/// \brief Dump stats about this call's analysis.
+void CallAnalyzer::dump() {
+#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n"
+ DEBUG_PRINT_STAT(NumConstantArgs);
+ DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
+ DEBUG_PRINT_STAT(NumAllocaArgs);
+ DEBUG_PRINT_STAT(NumConstantPtrCmps);
+ DEBUG_PRINT_STAT(NumConstantPtrDiffs);
+ DEBUG_PRINT_STAT(NumInstructionsSimplified);
+ DEBUG_PRINT_STAT(SROACostSavings);
+ DEBUG_PRINT_STAT(SROACostSavingsLost);
+#undef DEBUG_PRINT_STAT
}
-/// clear - empty the cache of inline costs
-void InlineCostAnalyzer::clear() {
- CachedFunctionInfo.clear();
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) {
+ return getInlineCost(CS, CS.getCalledFunction(), Threshold);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
+ int Threshold) {
+ // Don't inline functions which can be redefined at link-time to mean
+ // something else. Don't inline functions marked noinline or call sites
+ // marked noinline.
+ if (!Callee || Callee->mayBeOverridden() ||
+ Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
+ return llvm::InlineCost::getNever();
+
+ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n");
+
+ CallAnalyzer CA(TD, *Callee, Threshold);
+ bool ShouldInline = CA.analyzeCall(CS);
+
+ DEBUG(CA.dump());
+
+ // Check if there was a reason to force inlining or no inlining.
+ if (!ShouldInline && CA.getCost() < CA.getThreshold())
+ return InlineCost::getNever();
+ if (ShouldInline && CA.getCost() >= CA.getThreshold())
+ return InlineCost::getAlways();
+
+ return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 131cc97d2379..16e7a726595d 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,13 +18,17 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "instsimplify"
+#include "llvm/GlobalAlias.h"
#include "llvm/Operator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetData.h"
@@ -37,23 +41,28 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc, "Number of reassociations");
-static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
-static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
+struct Query {
+ const TargetData *TD;
+ const TargetLibraryInfo *TLI;
+ const DominatorTree *DT;
+
+ Query(const TargetData *td, const TargetLibraryInfo *tli,
+ const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {};
+};
+
+static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
+ unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
+ unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
/// getFalse - For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
static Constant *getFalse(Type *Ty) {
- assert((Ty->isIntegerTy(1) ||
- (Ty->isVectorTy() &&
- cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+ assert(Ty->getScalarType()->isIntegerTy(1) &&
"Expected i1 type or a vector of i1!");
return Constant::getNullValue(Ty);
}
@@ -61,13 +70,25 @@ static Constant *getFalse(Type *Ty) {
/// getTrue - For a boolean type, or a vector of boolean type, return true, or
/// a vector with every element true, as appropriate for the type.
static Constant *getTrue(Type *Ty) {
- assert((Ty->isIntegerTy(1) ||
- (Ty->isVectorTy() &&
- cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+ assert(Ty->getScalarType()->isIntegerTy(1) &&
"Expected i1 type or a vector of i1!");
return Constant::getAllOnesValue(Ty);
}
+/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
+static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
+ CmpInst *Cmp = dyn_cast<CmpInst>(V);
+ if (!Cmp)
+ return false;
+ CmpInst::Predicate CPred = Cmp->getPredicate();
+ Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1);
+ if (CPred == Pred && CLHS == LHS && CRHS == RHS)
+ return true;
+ return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
+ CRHS == LHS;
+}
+
/// ValueDominatesPHI - Does the given value dominate the specified phi node?
static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
@@ -75,9 +96,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
// Arguments and constants dominate all instructions.
return true;
+ // If we are processing instructions (and/or basic blocks) that have not been
+ // fully added to a function, the parent nodes may still be null. Simply
+ // return the conservative answer in these cases.
+ if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+ return false;
+
// If we have a DominatorTree then do a precise test.
- if (DT)
+ if (DT) {
+ if (!DT->isReachableFromEntry(P->getParent()))
+ return true;
+ if (!DT->isReachableFromEntry(I->getParent()))
+ return false;
return DT->dominates(I, P);
+ }
// Otherwise, if the instruction is in the entry block, and is not an invoke,
// then it obviously dominates all phi nodes.
@@ -94,8 +126,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExpand, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ unsigned OpcToExpand, const Query &Q,
+ unsigned MaxRecurse) {
Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -107,8 +139,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
// It does! Try turning it into "(A op C) op' (B op C)".
Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
// Do "A op C" and "B op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
// They do! Return "L op' R" if it simplifies or is already available.
// If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
@@ -117,8 +149,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return LHS;
}
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
- MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
++NumExpand;
return V;
}
@@ -131,8 +162,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
// It does! Try turning it into "(A op B) op' (A op C)".
Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
// Do "A op B" and "A op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+ if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
// They do! Return "L op' R" if it simplifies or is already available.
// If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
@@ -141,8 +172,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return RHS;
}
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
- MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
++NumExpand;
return V;
}
@@ -157,8 +187,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExtract, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ unsigned OpcToExtract, const Query &Q,
+ unsigned MaxRecurse) {
Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -182,7 +212,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *DD = A == C ? D : C;
// Form "A op' (B op DD)" if it simplifies completely.
// Does "B op DD" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) {
// It does! Return "A op' V" if it simplifies or is already available.
// If V equals B then "A op' V" is just the LHS. If V equals DD then
// "A op' V" is just the RHS.
@@ -191,7 +221,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return V == B ? LHS : RHS;
}
// Otherwise return "A op' V" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) {
++NumFactor;
return W;
}
@@ -205,7 +235,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *CC = B == D ? C : D;
// Form "(A op CC) op' B" if it simplifies completely..
// Does "A op CC" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) {
// It does! Return "V op' B" if it simplifies or is already available.
// If V equals A then "V op' B" is just the LHS. If V equals CC then
// "V op' B" is just the RHS.
@@ -214,7 +244,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return V == A ? LHS : RHS;
}
// Otherwise return "V op' B" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) {
++NumFactor;
return W;
}
@@ -227,9 +257,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
/// operations. Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
- const TargetData *TD,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
@@ -247,12 +275,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = RHS;
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
// It does! Return "A op V" if it simplifies or is already available.
// If V equals B then "A op V" is just the LHS.
if (V == B) return LHS;
// Otherwise return "A op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -266,12 +294,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
// It does! Return "V op C" if it simplifies or is already available.
// If V equals B then "V op C" is just the RHS.
if (V == B) return RHS;
// Otherwise return "V op C" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -289,12 +317,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = RHS;
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "V op B" if it simplifies or is already available.
// If V equals A then "V op B" is just the LHS.
if (V == A) return LHS;
// Otherwise return "V op B" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -308,12 +336,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "B op V" if it simplifies or is already available.
// If V equals C then "B op V" is just the RHS.
if (V == C) return RHS;
// Otherwise return "B op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -328,9 +356,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
/// evaluating it on both branches of the select results in the same value.
/// Returns the common value if so, otherwise returns null.
static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD,
- const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return 0;
@@ -347,11 +373,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
Value *TV;
Value *FV;
if (SI == LHS) {
- TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
- FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
} else {
- TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
- FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
}
// If they simplified to the same value, then return the common value.
@@ -402,8 +428,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
/// result in the same value. Returns the common value if so, otherwise returns
/// null.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const TargetData *TD,
- const DominatorTree *DT,
+ Value *RHS, const Query &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -416,40 +441,67 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
}
assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
SelectInst *SI = cast<SelectInst>(LHS);
+ Value *Cond = SI->getCondition();
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
// Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
// Does "cmp TV, RHS" simplify?
- if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
- MaxRecurse)) {
- // It does! Does "cmp FV, RHS" simplify?
- if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
- MaxRecurse)) {
- // It does! If they simplified to the same value, then use it as the
- // result of the original comparison.
- if (TCmp == FCmp)
- return TCmp;
- Value *Cond = SI->getCondition();
- // If the false value simplified to false, then the result of the compare
- // is equal to "Cond && TCmp". This also catches the case when the false
- // value simplified to false and the true value to true, returning "Cond".
- if (match(FCmp, m_Zero()))
- if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
- return V;
- // If the true value simplified to true, then the result of the compare
- // is equal to "Cond || FCmp".
- if (match(TCmp, m_One()))
- if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
- return V;
- // Finally, if the false value simplified to true and the true value to
- // false, then the result of the compare is equal to "!Cond".
- if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
- if (Value *V =
- SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
- TD, DT, MaxRecurse))
- return V;
- }
+ Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
+ if (TCmp == Cond) {
+ // It not only simplified, it simplified to the select condition. Replace
+ // it with 'true'.
+ TCmp = getTrue(Cond->getType());
+ } else if (!TCmp) {
+ // It didn't simplify. However if "cmp TV, RHS" is equal to the select
+ // condition then we can replace it with 'true'. Otherwise give up.
+ if (!isSameCompare(Cond, Pred, TV, RHS))
+ return 0;
+ TCmp = getTrue(Cond->getType());
+ }
+
+ // Does "cmp FV, RHS" simplify?
+ Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
+ if (FCmp == Cond) {
+ // It not only simplified, it simplified to the select condition. Replace
+ // it with 'false'.
+ FCmp = getFalse(Cond->getType());
+ } else if (!FCmp) {
+ // It didn't simplify. However if "cmp FV, RHS" is equal to the select
+ // condition then we can replace it with 'false'. Otherwise give up.
+ if (!isSameCompare(Cond, Pred, FV, RHS))
+ return 0;
+ FCmp = getFalse(Cond->getType());
}
+ // If both sides simplified to the same value, then use it as the result of
+ // the original comparison.
+ if (TCmp == FCmp)
+ return TCmp;
+
+ // The remaining cases only make sense if the select condition has the same
+ // type as the result of the comparison, so bail out if this is not so.
+ if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
+ return 0;
+ // If the false value simplified to false, then the result of the compare
+ // is equal to "Cond && TCmp". This also catches the case when the false
+ // value simplified to false and the true value to true, returning "Cond".
+ if (match(FCmp, m_Zero()))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+ return V;
+ // If the true value simplified to true, then the result of the compare
+ // is equal to "Cond || FCmp".
+ if (match(TCmp, m_One()))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+ return V;
+ // Finally, if the false value simplified to true and the true value to
+ // false, then the result of the compare is equal to "!Cond".
+ if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+ if (Value *V =
+ SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+ Q, MaxRecurse))
+ return V;
+
return 0;
}
@@ -458,8 +510,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
/// it on the incoming phi values yields the same result for every value. If so
/// returns the common value, otherwise returns null.
static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return 0;
@@ -468,13 +519,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
if (isa<PHINode>(LHS)) {
PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(RHS, PI, DT))
+ if (!ValueDominatesPHI(RHS, PI, Q.DT))
return 0;
} else {
assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
PI = cast<PHINode>(RHS);
// Bail out if LHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(LHS, PI, DT))
+ if (!ValueDominatesPHI(LHS, PI, Q.DT))
return 0;
}
@@ -485,8 +536,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
Value *V = PI == LHS ?
- SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
- SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+ SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -502,8 +553,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
/// incoming phi values yields the same result every time. If so returns the
/// common result, otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return 0;
@@ -517,7 +567,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
PHINode *PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(RHS, PI, DT))
+ if (!ValueDominatesPHI(RHS, PI, Q.DT))
return 0;
// Evaluate the BinOp on the incoming phi values.
@@ -526,7 +576,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
Value *Incoming = PI->getIncomingValue(i);
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
- Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -540,13 +590,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
/// SimplifyAddInst - Given operands for an Add, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
- return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
- Ops, TD);
+ return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
+ Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -576,17 +625,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// i1 add -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -602,20 +651,116 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+/// \brief Accumulate the constant integer offset a GEP represents.
+///
+/// Given a getelementptr instruction/constantexpr, accumulate the constant
+/// offset from the base pointer into the provided APInt 'Offset'. Returns true
+/// if the GEP has all-constant indices. Returns false if any non-constant
+/// index is encountered leaving the 'Offset' in an undefined state. The
+/// 'Offset' APInt must be the bitwidth of the target's pointer size.
+static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP,
+ APInt &Offset) {
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ assert(IntPtrWidth == Offset.getBitWidth());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(*I);
+ if (!OpC) return false;
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD.getStructLayout(STy);
+ Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+ continue;
+ }
+
+ APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType()));
+ Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+ }
+ return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+static Constant *stripAndComputeConstantOffsets(const TargetData &TD,
+ Value *&V) {
+ if (!V->getType()->isPointerTy())
+ return 0;
+
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset))
+ break;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ break;
+ V = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ Type *IntPtrTy = TD.getIntPtrType(V->getContext());
+ return ConstantInt::get(IntPtrTy, Offset);
+}
+
+/// \brief Compute the constant difference between two pointer values.
+/// If the difference is not a constant, returns zero.
+static Constant *computePointerDifference(const TargetData &TD,
+ Value *LHS, Value *RHS) {
+ Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+ if (!LHSOffset)
+ return 0;
+ Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+ if (!RHSOffset)
+ return 0;
+
+ // If LHS and RHS are not related via constant offsets to the same base
+ // value, there is nothing we can do here.
+ if (LHS != RHS)
+ return 0;
+
+ // Otherwise, the difference of LHS - RHS can be computed as:
+ // LHS - RHS
+ // = (LHSOffset + Base) - (RHSOffset + Base)
+ // = LHSOffset - RHSOffset
+ return ConstantExpr::getSub(LHSOffset, RHSOffset);
}
/// SimplifySubInst - Given operands for a Sub, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0))
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
- Ops, TD);
+ Ops, Q.TD, Q.TLI);
}
// X - undef -> undef
@@ -643,19 +788,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *Y = 0, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
// It does! Now see if "X + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
// It does! Now see if "Y + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -667,19 +810,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
X = Op0;
if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
// See if "V === X - Y" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
// It does! Now see if "V - Z" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
// It does! Now see if "V - Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -691,23 +832,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Z = Op0;
if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
// See if "V === Z - X" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
// It does! Now see if "V + Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
- MaxRecurse-1)) {
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
+ // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
+ if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
+ match(Op1, m_Trunc(m_Value(Y))))
+ if (X->getType() == Y->getType())
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+ // It does! Now see if "trunc V" simplifies.
+ if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+ // It does, return the simplified "trunc V".
+ return W;
+
+ // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
+ if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) &&
+ match(Op1, m_PtrToInt(m_Value(Y))))
+ if (Constant *Result = computePointerDifference(*Q.TD, X, Y))
+ return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+
// Mul distributes over Sub. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -723,19 +880,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyMulInst - Given operands for a Mul, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
- Ops, TD);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -755,40 +914,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
return Op0;
// (X / Y) * Y -> X if the division is exact.
- Value *X = 0, *Y = 0;
- if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
- (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
- BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
- if (Div->isExact())
- return X;
- }
+ Value *X = 0;
+ if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
+ match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y)
+ return X;
// i1 mul -> and.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
@@ -796,19 +952,19 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
}
}
@@ -842,7 +998,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
Value *X = 0, *Y = 0;
if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
- BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+ OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
// If the Mul knows it does not overflow, then we are good to go.
if ((isSigned && Mul->hasNoSignedWrap()) ||
(!isSigned && Mul->hasNoUnsignedWrap()))
@@ -861,13 +1017,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -875,36 +1031,38 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// SimplifySDivInst - Given operands for an SDiv, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyUDivInst - Given operands for a UDiv, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
- const DominatorTree *, unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned) {
// undef / X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -917,19 +1075,19 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
}
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyRem - Given operands for an SRem or URem, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
}
}
@@ -964,13 +1122,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -978,36 +1136,38 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// SimplifySRemInst - Given operands for an SRem, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyURemInst - Given operands for a URem, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
- const DominatorTree *, unsigned) {
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
+ unsigned) {
// undef % X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1020,19 +1180,19 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
}
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
}
}
@@ -1057,13 +1217,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
@@ -1072,9 +1232,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
/// SimplifyShlInst - Given operands for an Shl, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
return V;
// undef << X -> 0
@@ -1083,23 +1242,23 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// (X >> A) << A -> X
Value *X;
- if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
- cast<PossiblyExactOperator>(Op0)->isExact())
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
return X;
return 0;
}
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyLShrInst - Given operands for an LShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
return V;
// undef >>l X -> 0
@@ -1116,16 +1275,18 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyAShrInst - Given operands for an AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+ const Query &Q, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
return V;
// all ones >>a X -> all ones
@@ -1146,19 +1307,22 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyAndInst - Given operands for an And, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
- Ops, TD);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -1197,37 +1361,46 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
(A == Op0 || B == Op0))
return Op0;
+ // A & (-A) = A if A is a power of two or zero.
+ if (match(Op0, m_Neg(m_Specific(Op1))) ||
+ match(Op1, m_Neg(m_Specific(Op0)))) {
+ if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true))
+ return Op0;
+ if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true))
+ return Op1;
+ }
+
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
@@ -1235,19 +1408,20 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyOrInst - Given operands for an Or, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
- Ops, TD);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -1297,51 +1471,51 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
return Constant::getAllOnesValue(Op0->getType());
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
- TD, DT, MaxRecurse))
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
+ MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyXorInst - Given operands for a Xor, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
- Ops, TD);
+ Ops, Q.TD, Q.TLI);
}
// Canonicalize the constant to the RHS.
@@ -1366,13 +1540,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
return Constant::getAllOnesValue(Op0->getType());
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
- TD, DT, MaxRecurse))
+ Q, MaxRecurse))
return V;
// Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -1388,8 +1562,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
}
static Type *GetCompareTy(Value *Op) {
@@ -1416,17 +1591,56 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
return 0;
}
+static Constant *computePointerICmp(const TargetData &TD,
+ CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ // We can only fold certain predicates on pointer comparisons.
+ switch (Pred) {
+ default:
+ return 0;
+
+ // Equality comaprisons are easy to fold.
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_NE:
+ break;
+
+ // We can only handle unsigned relational comparisons because 'inbounds' on
+ // a GEP only protects against unsigned wrapping.
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ // However, we have to switch them to their signed variants to handle
+ // negative indices from the base pointer.
+ Pred = ICmpInst::getSignedPredicate(Pred);
+ break;
+ }
+
+ Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+ if (!LHSOffset)
+ return 0;
+ Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+ if (!RHSOffset)
+ return 0;
+
+ // If LHS and RHS are not related via constant offsets to the same base
+ // value, there is nothing we can do here.
+ if (LHS != RHS)
+ return 0;
+
+ return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+}
+
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -1443,8 +1657,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
// Special case logic when the operands have i1 type.
- if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
- cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+ if (OpTy->getScalarType()->isIntegerTy(1)) {
switch (Pred) {
default: break;
case ICmpInst::ICMP_EQ:
@@ -1480,63 +1693,101 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
- // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
- // different addresses, and what's more the address of a stack variable is
- // never null or equal to the address of a global. Note that generalizing
- // to the case where LHS is a global variable address or null is pointless,
- // since if both LHS and RHS are constants then we already constant folded
- // the compare, and if only one of them is then we moved it to RHS already.
- if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
- isa<ConstantPointerNull>(RHS)))
- // We already know that LHS != RHS.
- return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+ // icmp <object*>, <object*/null> - Different identified objects have
+ // different addresses (unless null), and what's more the address of an
+ // identified local is never equal to another argument (again, barring null).
+ // Note that generalizing to the case where LHS is a global variable address
+ // or null is pointless, since if both LHS and RHS are constants then we
+ // already constant folded the compare, and if only one of them is then we
+ // moved it to RHS already.
+ Value *LHSPtr = LHS->stripPointerCasts();
+ Value *RHSPtr = RHS->stripPointerCasts();
+ if (LHSPtr == RHSPtr)
+ return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+ // Be more aggressive about stripping pointer adjustments when checking a
+ // comparison of an alloca address to another object. We can rip off all
+ // inbounds GEP operations, even if they are variable.
+ LHSPtr = LHSPtr->stripInBoundsOffsets();
+ if (llvm::isIdentifiedObject(LHSPtr)) {
+ RHSPtr = RHSPtr->stripInBoundsOffsets();
+ if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) {
+ // If both sides are different identified objects, they aren't equal
+ // unless they're null.
+ if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) &&
+ Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+
+ // A local identified object (alloca or noalias call) can't equal any
+ // incoming argument, unless they're both null.
+ if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) &&
+ Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+ }
+
+ // Assume that the constant null is on the right.
+ if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+ else if (Pred == CmpInst::ICMP_NE)
+ return ConstantInt::get(ITy, true);
+ }
+ } else if (isa<Argument>(LHSPtr)) {
+ RHSPtr = RHSPtr->stripInBoundsOffsets();
+ // An alloca can't be equal to an argument.
+ if (isa<AllocaInst>(RHSPtr)) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return ConstantInt::get(ITy, false);
+ else if (Pred == CmpInst::ICMP_NE)
+ return ConstantInt::get(ITy, true);
+ }
+ }
// If we are comparing with zero then try hard since this is a common case.
if (match(RHS, m_Zero())) {
bool LHSKnownNonNegative, LHSKnownNegative;
switch (Pred) {
- default:
- assert(false && "Unknown ICmp predicate!");
+ default: llvm_unreachable("Unknown ICmp predicate!");
case ICmpInst::ICMP_ULT:
return getFalse(ITy);
case ICmpInst::ICMP_UGE:
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, TD))
+ if (isKnownNonZero(LHS, Q.TD))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, TD))
+ if (isKnownNonZero(LHS, Q.TD))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getTrue(ITy);
if (LHSKnownNonNegative)
return getFalse(ITy);
break;
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getTrue(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
return getFalse(ITy);
break;
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getFalse(ITy);
if (LHSKnownNonNegative)
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
if (LHSKnownNegative)
return getFalse(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
return getTrue(ITy);
break;
}
@@ -1564,6 +1815,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// 'srem x, CI2' produces (-|CI2|, |CI2|).
Upper = CI2->getValue().abs();
Lower = (-Upper) + 1;
+ } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
+ // 'udiv CI2, x' produces [0, CI2].
+ Upper = CI2->getValue() + 1;
} else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
// 'udiv x, CI2' produces [0, UINT_MAX / CI2].
APInt NegOne = APInt::getAllOnesValue(Width);
@@ -1616,19 +1870,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
- if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
- TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
+ Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
ConstantExpr::getIntToPtr(RHSC, SrcTy),
- TD, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
} else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
if (RI->getOperand(0)->getType() == SrcTy)
// Compare without the cast.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- TD, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
}
}
@@ -1640,7 +1894,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that signed predicates become unsigned.
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, RI->getOperand(0), TD, DT,
+ SrcOp, RI->getOperand(0), Q,
MaxRecurse-1))
return V;
}
@@ -1656,15 +1910,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// also a case of comparing two zero-extended values.
if (RExt == CI && MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, Trunc, TD, DT, MaxRecurse-1))
+ SrcOp, Trunc, Q, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
// there. Use this to work out the result of the comparison.
if (RExt != CI) {
switch (Pred) {
- default:
- assert(false && "Unknown ICmp predicate!");
+ default: llvm_unreachable("Unknown ICmp predicate!");
// LHS <u RHS.
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
@@ -1701,7 +1954,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that the predicate does not change.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- TD, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
}
// Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
@@ -1715,16 +1968,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
- MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are all equal, while RHS has varying
// bits there. Use this to work out the result of the comparison.
if (RExt != CI) {
switch (Pred) {
- default:
- assert(false && "Unknown ICmp predicate!");
+ default: llvm_unreachable("Unknown ICmp predicate!");
case ICmpInst::ICMP_EQ:
return ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_NE:
@@ -1751,7 +2002,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
Constant::getNullValue(SrcTy),
- TD, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
break;
case ICmpInst::ICMP_ULT:
@@ -1760,7 +2011,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
Constant::getNullValue(SrcTy),
- TD, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
break;
}
@@ -1794,14 +2045,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if ((A == RHS || B == RHS) && NoLHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
Constant::getNullValue(RHS->getType()),
- TD, DT, MaxRecurse-1))
+ Q, MaxRecurse-1))
return V;
// icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
if ((C == LHS || D == LHS) && NoRHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred,
Constant::getNullValue(LHS->getType()),
- C == LHS ? D : C, TD, DT, MaxRecurse-1))
+ C == LHS ? D : C, Q, MaxRecurse-1))
return V;
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
@@ -1810,7 +2061,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y = (A == C || A == D) ? B : A;
Value *Z = (C == A || C == B) ? D : C;
- if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
return V;
}
}
@@ -1822,7 +2073,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -1832,7 +2083,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -1849,7 +2100,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -1859,7 +2110,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -1870,6 +2121,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // x udiv y <=u x.
+ if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
+ // icmp pred (X /u Y), X
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
LBO->getOperand(1) == RBO->getOperand(1)) {
switch (LBO->getOpcode()) {
@@ -1884,7 +2144,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!LBO->isExact() || !RBO->isExact())
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ RBO->getOperand(0), Q, MaxRecurse-1))
return V;
break;
case Instruction::Shl: {
@@ -1895,7 +2155,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!NSW && ICmpInst::isSigned(Pred))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ RBO->getOperand(0), Q, MaxRecurse-1))
return V;
break;
}
@@ -1949,7 +2209,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -1963,7 +2223,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
return V;
break;
}
@@ -2019,7 +2279,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -2033,7 +2293,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
return V;
break;
}
@@ -2090,37 +2350,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
}
+ // Simplify comparisons of related pointers using a powerful, recursive
+ // GEP-walk when we have target data available..
+ if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy())
+ if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS))
+ return C;
+
+ if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
+ if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
+ if (GLHS->getPointerOperand() == GRHS->getPointerOperand() &&
+ GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() &&
+ (ICmpInst::isEquality(Pred) ||
+ (GLHS->isInBounds() && GRHS->isInBounds() &&
+ Pred == ICmpInst::getSignedPredicate(Pred)))) {
+ // The bases are equal and the indices are constant. Build a constant
+ // expression GEP with the same indices and a null base pointer to see
+ // what constant folding can make out of it.
+ Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
+ SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
+ Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+
+ SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
+ Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+ return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
+ }
+ }
+ }
+
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -2188,27 +2477,31 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
}
/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
/// the result. If not, this returns null.
-Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
- const TargetData *TD, const DominatorTree *) {
+static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
+ Value *FalseVal, const Query &Q,
+ unsigned MaxRecurse) {
// select true, X, Y -> X
// select false, X, Y -> Y
if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
@@ -2231,12 +2524,22 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
return 0;
}
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
- const TargetData *TD, const DominatorTree *) {
+static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// The type of the GEP pointer operand.
- PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+ PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
+ // The GEP pointer operand is not a pointer, it's a vector of pointers.
+ if (!PtrTy)
+ return 0;
// getelementptr P -> P.
if (Ops.size() == 1)
@@ -2255,9 +2558,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
if (C->isZero())
return Ops[0];
// getelementptr P, N -> P if P points to a type of zero size.
- if (TD) {
+ if (Q.TD) {
Type *Ty = PtrTy->getElementType();
- if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+ if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0)
return Ops[0];
}
}
@@ -2270,12 +2573,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
}
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit);
+}
+
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
/// can fold the result. If not, this returns null.
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs,
- const TargetData *,
- const DominatorTree *) {
+static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs, const Query &Q,
+ unsigned) {
if (Constant *CAgg = dyn_cast<Constant>(Agg))
if (Constant *CVal = dyn_cast<Constant>(Val))
return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
@@ -2300,8 +2608,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
return 0;
}
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
Value *CommonValue = 0;
@@ -2329,67 +2646,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
// instruction, we cannot return X as the result of the PHI node unless it
// dominates the PHI block.
if (HasUndefInput)
- return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+ return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
return CommonValue;
}
+static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+ if (Constant *C = dyn_cast<Constant>(Op))
+ return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI);
+
+ return 0;
+}
+
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit);
+}
//=== Helper functions for higher up the class hierarchy.
/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, DT, MaxRecurse);
+ Q, MaxRecurse);
case Instruction::Sub:
return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, DT, MaxRecurse);
- case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+ Q, MaxRecurse);
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, DT, MaxRecurse);
+ Q, MaxRecurse);
case Instruction::LShr:
- return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
case Instruction::AShr:
- return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
- case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
default:
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Constant *COps[] = {CLHS, CRHS};
- return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD);
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD,
+ Q.TLI);
}
// If the operation is associative, try some generic simplifications.
if (Instruction::isAssociative(Opcode))
- if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
- MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
- // If the operation is with the result of a select instruction, check whether
+ // If the operation is with the result of a select instruction check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
return 0;
@@ -2397,119 +2724,136 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit);
}
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
/// fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const Query &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
- return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
- return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+ RecursionLimit);
+}
+
+static Value *SimplifyCallInst(CallInst *CI, const Query &) {
+ // call undef -> undef
+ if (isa<UndefValue>(CI->getCalledValue()))
+ return UndefValue::get(CI->getType());
+
+ return 0;
}
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
Value *Result;
switch (I->getOpcode()) {
default:
- Result = ConstantFoldInstruction(I, TD);
+ Result = ConstantFoldInstruction(I, TD, TLI);
break;
case Instruction::Add:
Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::Sub:
Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::Mul:
- Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::FDiv:
- Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::URem:
- Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::FRem:
- Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->isExact(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->isExact(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::And:
- Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Or:
- Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Xor:
- Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::ICmp:
Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), TD, DT);
+ I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::FCmp:
Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), TD, DT);
+ I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), TD, DT);
+ I->getOperand(2), TD, TLI, DT);
break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- Result = SimplifyGEPInst(Ops, TD, DT);
+ Result = SimplifyGEPInst(Ops, TD, TLI, DT);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
IV->getInsertedValueOperand(),
- IV->getIndices(), TD, DT);
+ IV->getIndices(), TD, TLI, DT);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), DT);
+ Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
+ break;
+ case Instruction::Call:
+ Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT));
+ break;
+ case Instruction::Trunc:
+ Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
break;
}
@@ -2519,57 +2863,84 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
return Result == I ? UndefValue::get(I->getType()) : Result;
}
-/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
-/// delete the From instruction. In addition to a basic RAUW, this does a
-/// recursive simplification of the newly formed instructions. This catches
-/// things where one simplification exposes other opportunities. This only
-/// simplifies and deletes scalar operations, it does not change the CFG.
+/// \brief Implementation of recursive simplification through an instructions
+/// uses.
///
-void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
- const TargetData *TD,
- const DominatorTree *DT) {
- assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
-
- // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
- // we can know if it gets deleted out from under us or replaced in a
- // recursive simplification.
- WeakVH FromHandle(From);
- WeakVH ToHandle(To);
-
- while (!From->use_empty()) {
- // Update the instruction to use the new value.
- Use &TheUse = From->use_begin().getUse();
- Instruction *User = cast<Instruction>(TheUse.getUser());
- TheUse = To;
-
- // Check to see if the instruction can be folded due to the operand
- // replacement. For example changing (or X, Y) into (or X, -1) can replace
- // the 'or' with -1.
- Value *SimplifiedVal;
- {
- // Sanity check to make sure 'User' doesn't dangle across
- // SimplifyInstruction.
- AssertingVH<> UserHandle(User);
-
- SimplifiedVal = SimplifyInstruction(User, TD, DT);
- if (SimplifiedVal == 0) continue;
- }
+/// This is the common implementation of the recursive simplification routines.
+/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
+/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
+/// instructions to process and attempt to simplify it using
+/// InstructionSimplify.
+///
+/// This routine returns 'true' only when *it* simplifies something. The passed
+/// in simplified value does not count toward this.
+static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ bool Simplified = false;
+ SmallSetVector<Instruction *, 8> Worklist;
+
+ // If we have an explicit value to collapse to, do that round of the
+ // simplification loop by hand initially.
+ if (SimpleV) {
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ if (*UI != I)
+ Worklist.insert(cast<Instruction>(*UI));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // Gracefully handle edge cases where the instruction is not wired into any
+ // parent block.
+ if (I->getParent())
+ I->eraseFromParent();
+ } else {
+ Worklist.insert(I);
+ }
- // Recursively simplify this user to the new value.
- ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
- From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
- To = ToHandle;
+ // Note that we must test the size on each iteration, the worklist can grow.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ I = Worklist[Idx];
+
+ // See if this instruction simplifies.
+ SimpleV = SimplifyInstruction(I, TD, TLI, DT);
+ if (!SimpleV)
+ continue;
- assert(ToHandle && "To value deleted by recursive simplification?");
+ Simplified = true;
- // If the recursive simplification ended up revisiting and deleting
- // 'From' then we're done.
- if (From == 0)
- return;
+ // Stash away all the uses of the old instruction so we can check them for
+ // recursive simplifications after a RAUW. This is cheaper than checking all
+ // uses of To on the recursive step in most cases.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ Worklist.insert(cast<Instruction>(*UI));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // Gracefully handle edge cases where the instruction is not wired into any
+ // parent block.
+ if (I->getParent())
+ I->eraseFromParent();
}
+ return Simplified;
+}
- // If 'From' has value handles referring to it, do a real RAUW to update them.
- From->replaceAllUsesWith(To);
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT);
+}
- From->eraseFromParent();
+bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
+ assert(SimpleV && "Must provide a simplified value.");
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT);
}
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
new file mode 100644
index 000000000000..a8a8079d1e5a
--- /dev/null
+++ b/lib/Analysis/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/Analysis/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = IPA
+
+[component_0]
+type = Library
+name = Analysis
+parent = Libraries
+required_libraries = Core Support Target
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index f80595c7dbed..5ca2746c9f6a 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -20,20 +20,25 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include <map>
#include <stack>
using namespace llvm;
+using namespace PatternMatch;
char LazyValueInfo::ID = 0;
-INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
namespace llvm {
@@ -61,10 +66,10 @@ class LVILatticeVal {
constant,
/// notconstant - This Value is known to not have the specified value.
notconstant,
-
+
/// constantrange - The Value falls within this range.
constantrange,
-
+
/// overdefined - This value is not known to be constant, and we know that
/// it has a value.
overdefined
@@ -207,7 +212,7 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use TargetData for smarter constant folding.
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
if (ConstantInt *Res = dyn_cast<ConstantInt>(
ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
getConstant(),
@@ -233,7 +238,7 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use TargetData for smarter constant folding.
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
if (ConstantInt *Res = dyn_cast<ConstantInt>(
ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
getNotConstant(),
@@ -305,50 +310,6 @@ namespace {
};
}
-namespace llvm {
- template<>
- struct DenseMapInfo<LVIValueHandle> {
- typedef DenseMapInfo<Value*> PointerInfo;
- static inline LVIValueHandle getEmptyKey() {
- return LVIValueHandle(PointerInfo::getEmptyKey(),
- static_cast<LazyValueInfoCache*>(0));
- }
- static inline LVIValueHandle getTombstoneKey() {
- return LVIValueHandle(PointerInfo::getTombstoneKey(),
- static_cast<LazyValueInfoCache*>(0));
- }
- static unsigned getHashValue(const LVIValueHandle &Val) {
- return PointerInfo::getHashValue(Val);
- }
- static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
- return LHS == RHS;
- }
- };
-
- template<>
- struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
- typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
- typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
- typedef DenseMapInfo<Value*> BPointerInfo;
- static inline PairTy getEmptyKey() {
- return std::make_pair(APointerInfo::getEmptyKey(),
- BPointerInfo::getEmptyKey());
- }
- static inline PairTy getTombstoneKey() {
- return std::make_pair(APointerInfo::getTombstoneKey(),
- BPointerInfo::getTombstoneKey());
- }
- static unsigned getHashValue( const PairTy &Val) {
- return APointerInfo::getHashValue(Val.first) ^
- BPointerInfo::getHashValue(Val.second);
- }
- static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
- return APointerInfo::isEqual(LHS.first, RHS.first) &&
- BPointerInfo::isEqual(LHS.second, RHS.second);
- }
- };
-}
-
namespace {
/// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
@@ -360,14 +321,18 @@ namespace {
/// ValueCache - This is all of the cached information for all values,
/// mapped from Value* to key information.
- DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+ std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
/// OverDefinedCache - This tracks, on a per-block basis, the set of
/// values that are over-defined at the end of that block. This is required
/// for cache updating.
typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
DenseSet<OverDefinedPairTy> OverDefinedCache;
-
+
+ /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+ /// don't spend time removing unused blocks from our caches.
+ DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+
/// BlockValueStack - This stack holds the state of the value solver
/// during a query. It basically emulates the callstack of the naive
/// recursive value lookup process.
@@ -438,6 +403,7 @@ namespace {
/// clear - Empty the cache.
void clear() {
+ SeenBlocks.clear();
ValueCache.clear();
OverDefinedCache.clear();
}
@@ -466,6 +432,12 @@ void LVIValueHandle::deleted() {
}
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ // Shortcut if we have never seen this block.
+ DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+ if (I == SeenBlocks.end())
+ return;
+ SeenBlocks.erase(I);
+
SmallVector<OverDefinedPairTy, 4> ToErase;
for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
E = OverDefinedCache.end(); I != E; ++I) {
@@ -477,7 +449,7 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
E = ToErase.end(); I != E; ++I)
OverDefinedCache.erase(*I);
- for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
+ for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
I->second.erase(BB);
}
@@ -505,6 +477,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
if (Constant *VC = dyn_cast<Constant>(Val))
return LVILatticeVal::get(VC);
+ SeenBlocks.insert(BB);
return lookup(Val)[BB];
}
@@ -513,6 +486,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
return true;
ValueCacheEntryTy &Cache = lookup(Val);
+ SeenBlocks.insert(BB);
LVILatticeVal &BBLV = Cache[BB];
// OverDefinedCacheUpdater is a helper object that will update
@@ -823,9 +797,8 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
- if (ICI && ICI->getOperand(0) == Val &&
- isa<Constant>(ICI->getOperand(1))) {
- if (ICI->isEquality()) {
+ if (ICI && isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality() && ICI->getOperand(0) == Val) {
// We know that V has the RHS constant if this is a true SETEQ or
// false SETNE.
if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
@@ -835,12 +808,23 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
return true;
}
- if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ // Recognize the range checking idiom that InstCombine produces.
+ // (X-C1) u< C2 --> [C1, C1+C2)
+ ConstantInt *NegOffset = 0;
+ if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
+ match(ICI->getOperand(0), m_Add(m_Specific(Val),
+ m_ConstantInt(NegOffset)));
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
+ if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
// Calculate the range of values that would satisfy the comparison.
ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
ConstantRange TrueValues =
ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+ if (NegOffset) // Apply the offset from above.
+ TrueValues = TrueValues.subtract(NegOffset->getValue());
+
// If we're interested in the false dest, invert the condition.
if (!isTrueDest) TrueValues = TrueValues.inverse();
@@ -882,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// BBFrom to BBTo.
unsigned NumEdges = 0;
ConstantInt *EdgeVal = 0;
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
- if (SI->getSuccessor(i) != BBTo) continue;
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ if (i.getCaseSuccessor() != BBTo) continue;
if (NumEdges++) break;
- EdgeVal = SI->getCaseValue(i);
+ EdgeVal = i.getCaseValue();
}
assert(EdgeVal && "Missing successor?");
if (NumEdges == 1) {
@@ -1007,12 +992,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) {
bool LazyValueInfo::runOnFunction(Function &F) {
if (PImpl)
getCache(PImpl).clear();
-
+
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
// Fully lazy.
return false;
}
+void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfo>();
+}
+
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
@@ -1061,7 +1053,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
// If we know the value is a constant, evaluate the conditional.
Constant *Res = 0;
if (Result.isConstant()) {
- Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+ Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD,
+ TLI);
if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
return ResCI->isZero() ? False : True;
return Unknown;
@@ -1102,13 +1095,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
if (Pred == ICmpInst::ICMP_EQ) {
// !C1 == C -> false iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
- Result.getNotConstant(), C, TD);
+ Result.getNotConstant(), C, TD,
+ TLI);
if (Res->isNullValue())
return False;
} else if (Pred == ICmpInst::ICMP_NE) {
// !C1 != C -> true iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
- Result.getNotConstant(), C, TD);
+ Result.getNotConstant(), C, TD,
+ TLI);
if (Res->isNullValue())
return True;
}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 38d677d502a7..83bdf5286ad7 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -44,6 +44,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
#include "llvm/IntrinsicInst.h"
@@ -103,6 +104,7 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
TargetData *TD;
+ TargetLibraryInfo *TLI;
std::string Messages;
raw_string_ostream MessagesStr;
@@ -117,6 +119,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
AU.addRequired<DominatorTree>();
}
virtual void print(raw_ostream &O, const Module *M) const {}
@@ -149,6 +152,7 @@ namespace {
char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
visit(F);
dbgs() << MessagesStr.str();
Messages.clear();
@@ -411,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I,
if (Align != 0) {
unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
- APInt Mask = APInt::getAllOnesValue(BitWidth),
- KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD);
Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
"Undefined behavior: Memory reference address is misaligned", &I);
}
@@ -471,9 +475,8 @@ static bool isZero(Value *V, TargetData *TD) {
if (isa<UndefValue>(V)) return true;
unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
- APInt Mask = APInt::getAllOnesValue(BitWidth),
- KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD);
return KnownZero.isAllOnesValue();
}
@@ -614,10 +617,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, TD, DT))
+ if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (Value *W = ConstantFoldConstantExpression(CE, TD))
+ if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
if (W != V)
return findValueImpl(W, OffsetOk, Visited);
}
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 0e6bcbfae4f6..873a27543dd6 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -17,6 +17,7 @@
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Operator.h"
using namespace llvm;
@@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
/// it is set to 0, it will scan the whole block. You can also optionally
/// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there. If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA) {
+ AliasAnalysis *AA,
+ MDNode **TBAATag) {
if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
// If we're using alias analysis to disambiguate get the size of *Ptr.
@@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+ if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
return LI;
+ }
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+ if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
return SI->getOperand(0);
+ }
// If Ptr is an alloca and this is a store to a different alloca, ignore
// the store. This is a trivial form of alias analysis that is important
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index 3997ac478b52..463269d9d984 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -91,8 +91,6 @@ static Value *GetPointerOperand(Value *I) {
if (StoreInst *i = dyn_cast<StoreInst>(I))
return i->getPointerOperand();
llvm_unreachable("Value is no load or store instruction!");
- // Never reached.
- return 0;
}
static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 85aaccaefc37..f7a60a1737d4 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
@@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
// Test if the value is already loop-invariant.
if (isLoopInvariant(I))
return true;
- if (!I->isSafeToSpeculativelyExecute())
+ if (!isSafeToSpeculativelyExecute(I))
return false;
if (I->mayReadFromMemory())
return false;
@@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return 0;
}
-/// getTripCount - Return a loop-invariant LLVM value indicating the number of
-/// times the loop will be executed. Note that this means that the backedge
-/// of the loop executes N-1 times. If the trip-count cannot be determined,
-/// this returns null.
-///
-/// The IndVarSimplify pass transforms loops to have a form that this
-/// function easily understands.
-///
-Value *Loop::getTripCount() const {
- // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
- // canonical induction variable and V is the trip count of the loop.
- PHINode *IV = getCanonicalInductionVariable();
- if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
-
- bool P0InLoop = contains(IV->getIncomingBlock(0));
- Value *Inc = IV->getIncomingValue(!P0InLoop);
- BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
-
- if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
- if (BI->isConditional()) {
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
- if (ICI->getOperand(0) == Inc) {
- if (BI->getSuccessor(0) == getHeader()) {
- if (ICI->getPredicate() == ICmpInst::ICMP_NE)
- return ICI->getOperand(1);
- } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
- return ICI->getOperand(1);
- }
- }
- }
- }
-
- return 0;
-}
-
-/// getSmallConstantTripCount - Returns the trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the trip count is very large
-/// (>= 2^32)
-unsigned Loop::getSmallConstantTripCount() const {
- Value* TripCount = this->getTripCount();
- if (TripCount) {
- if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
- // Guard against huge trip counts.
- if (TripCountC->getValue().getActiveBits() <= 32) {
- return (unsigned)TripCountC->getZExtValue();
- }
- }
- }
- return 0;
-}
-
-/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-/// trip count of this loop as a normal unsigned value, if possible. This
-/// means that the actual trip count is always a multiple of the returned
-/// value (don't forget the trip count could very well be zero as well!).
-///
-/// Returns 1 if the trip count is unknown or not guaranteed to be the
-/// multiple of a constant (which is also the case if the trip count is simply
-/// constant, use getSmallConstantTripCount for that case), Will also return 1
-/// if the trip count is very large (>= 2^32).
-unsigned Loop::getSmallConstantTripMultiple() const {
- Value* TripCount = this->getTripCount();
- // This will hold the ConstantInt result, if any
- ConstantInt *Result = NULL;
- if (TripCount) {
- // See if the trip count is constant itself
- Result = dyn_cast<ConstantInt>(TripCount);
- // if not, see if it is a multiplication
- if (!Result)
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
- switch (BO->getOpcode()) {
- case BinaryOperator::Mul:
- Result = dyn_cast<ConstantInt>(BO->getOperand(1));
- break;
- case BinaryOperator::Shl:
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
- if (CI->getValue().getActiveBits() <= 5)
- return 1u << CI->getZExtValue();
- break;
- default:
- break;
- }
- }
- }
- // Guard against huge trip counts.
- if (Result && Result->getValue().getActiveBits() <= 32) {
- return (unsigned)Result->getZExtValue();
- } else {
- return 1;
- }
-}
-
/// isLCSSAForm - Return true if the Loop is in LCSSA form
bool Loop::isLCSSAForm(DominatorTree &DT) const {
// Sort the blocks vector so that we can use binary search to do quick
@@ -297,6 +205,17 @@ bool Loop::isLoopSimplifyForm() const {
return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
}
+/// isSafeToClone - Return true if the loop body is safe to clone in practice.
+/// Routines that reform the loop CFG and split edges often fail on indirectbr.
+bool Loop::isSafeToClone() const {
+ // Return false if any loop blocks contain indirectbrs.
+ for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
+ if (isa<IndirectBrInst>((*I)->getTerminator()))
+ return false;
+ }
+ return true;
+}
+
/// hasDedicatedExits - Return true if no exit block for the loop
/// has a predecessor that is outside the loop.
bool Loop::hasDedicatedExits() const {
@@ -477,21 +396,19 @@ void UnloopUpdater::updateBlockParents() {
/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below
/// their new parents.
void UnloopUpdater::removeBlocksFromAncestors() {
- // Remove unloop's blocks from all ancestors below their new parents.
+ // Remove all unloop's blocks (including those in nested subloops) from
+ // ancestors below the new parent loop.
for (Loop::block_iterator BI = Unloop->block_begin(),
BE = Unloop->block_end(); BI != BE; ++BI) {
- Loop *NewParent = LI->getLoopFor(*BI);
- // If this block is an immediate subloop, remove all blocks (including
- // nested subloops) from ancestors below the new parent loop.
- // Otherwise, if this block is in a nested subloop, skip it.
- if (SubloopParents.count(NewParent))
- NewParent = SubloopParents[NewParent];
- else if (Unloop->contains(NewParent))
- continue;
-
+ Loop *OuterParent = LI->getLoopFor(*BI);
+ if (Unloop->contains(OuterParent)) {
+ while (OuterParent->getParentLoop() != Unloop)
+ OuterParent = OuterParent->getParentLoop();
+ OuterParent = SubloopParents[OuterParent];
+ }
// Remove blocks from former Ancestors except Unloop itself which will be
// deleted.
- for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent;
+ for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent;
OldParent = OldParent->getParentLoop()) {
assert(OldParent && "new loop is not an ancestor of the original");
OldParent->removeBlockFromLoop(*BI);
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 5ba1f4045d1a..aba700ac5c34 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -14,10 +14,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/DebugInfoProbe.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Timer.h"
using namespace llvm;
@@ -54,20 +52,6 @@ char PrintLoopPass::ID = 0;
}
//===----------------------------------------------------------------------===//
-// DebugInfoProbe
-
-static DebugInfoProbeInfo *TheDebugProbe;
-static void createDebugInfoProbe() {
- if (TheDebugProbe) return;
-
- // Constructed the first time this is called. This guarantees that the
- // object will be constructed, if -enable-debug-info-probe is set,
- // before static globals, thus it will be destroyed before them.
- static ManagedStatic<DebugInfoProbeInfo> DIP;
- TheDebugProbe = &*DIP;
-}
-
-//===----------------------------------------------------------------------===//
// LPPassManager
//
@@ -195,7 +179,6 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
bool LPPassManager::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
bool Changed = false;
- createDebugInfoProbe();
// Collect inherited analysis from Module level pass manager.
populateInheritedAnalysis(TPM->activeStack);
@@ -227,21 +210,19 @@ bool LPPassManager::runOnFunction(Function &F) {
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
+
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
CurrentLoop->getHeader()->getName());
dumpRequiredSet(P);
initializeAnalysisImpl(P);
- if (TheDebugProbe)
- TheDebugProbe->initialize(P, F);
+
{
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
TimeRegion PassTimer(getPassTimer(P));
Changed |= P->runOnLoop(CurrentLoop, *this);
}
- if (TheDebugProbe)
- TheDebugProbe->finalize(P, F);
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index fde07ea4f98d..22414b36d5a4 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
AliasAnalysis::Location Loc = AA.getLocation(LI);
MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (!LI->isUnordered()) {
+ if (!SI->isUnordered()) {
// FIXME: Handle atomic/volatile stores.
Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
static_cast<BasicBlock *>(0)));
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 8d451c46f9b0..b145650b0f0a 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -48,10 +48,10 @@ static bool isMallocCall(const CallInst *CI) {
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
FunctionType *FTy = Callee->getFunctionType();
- if (FTy->getNumParams() != 1)
- return false;
- return FTy->getParamType(0)->isIntegerTy(32) ||
- FTy->getParamType(0)->isIntegerTy(64);
+ return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) &&
+ FTy->getNumParams() == 1 &&
+ (FTy->getParamType(0)->isIntegerTy(32) ||
+ FTy->getParamType(0)->isIntegerTy(64));
}
/// extractMallocCall - Returns the corresponding CallInst if the instruction
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 92967c08dc21..3a544f35d502 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -22,6 +22,7 @@
#include "llvm/Function.h"
#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -91,6 +92,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
+ DT = getAnalysisIfAvailable<DominatorTree>();
if (PredCache == 0)
PredCache.reset(new PredIteratorCache());
return false;
@@ -321,14 +323,100 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
!TD.fitsInLegalInteger(NewLoadByteSize*8))
return 0;
+ if (LIOffs+NewLoadByteSize > MemLocEnd &&
+ LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) {
+ // We will be reading past the location accessed by the original program.
+ // While this is safe in a regular build, Address Safety analysis tools
+ // may start reporting false warnings. So, don't do widening.
+ return 0;
+ }
+
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs+NewLoadByteSize >= MemLocEnd)
return NewLoadByteSize;
NewLoadByteSize <<= 1;
}
-
- return 0;
+}
+
+namespace {
+ /// Only find pointer captures which happen before the given instruction. Uses
+ /// the dominator tree to determine whether one instruction is before another.
+ struct CapturesBefore : public CaptureTracker {
+ CapturesBefore(const Instruction *I, DominatorTree *DT)
+ : BeforeHere(I), DT(DT), Captured(false) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool shouldExplore(Use *U) {
+ Instruction *I = cast<Instruction>(U->getUser());
+ BasicBlock *BB = I->getParent();
+ if (BeforeHere != I &&
+ (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+ return false;
+ return true;
+ }
+
+ bool captured(Use *U) {
+ Instruction *I = cast<Instruction>(U->getUser());
+ BasicBlock *BB = I->getParent();
+ if (BeforeHere != I &&
+ (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+ return false;
+ Captured = true;
+ return true;
+ }
+
+ const Instruction *BeforeHere;
+ DominatorTree *DT;
+
+ bool Captured;
+ };
+}
+
+AliasAnalysis::ModRefResult
+MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst,
+ const AliasAnalysis::Location &MemLoc) {
+ AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+ if (MR != AliasAnalysis::ModRef) return MR;
+
+ // FIXME: this is really just shoring-up a deficiency in alias analysis.
+ // BasicAA isn't willing to spend linear time determining whether an alloca
+ // was captured before or after this particular call, while we are. However,
+ // with a smarter AA in place, this test is just wasting compile time.
+ if (!DT) return AliasAnalysis::ModRef;
+ const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD);
+ if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object))
+ return AliasAnalysis::ModRef;
+ ImmutableCallSite CS(Inst);
+ if (!CS.getInstruction()) return AliasAnalysis::ModRef;
+
+ CapturesBefore CB(Inst, DT);
+ llvm::PointerMayBeCaptured(Object, &CB);
+
+ if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured)
+ return AliasAnalysis::ModRef;
+
+ unsigned ArgNo = 0;
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI, ++ArgNo) {
+ // Only look at the no-capture or byval pointer arguments. If this
+ // pointer were passed to arguments that were neither of these, then it
+ // couldn't be no-capture.
+ if (!(*CI)->getType()->isPointerTy() ||
+ (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+ continue;
+
+ // If this is a no-capture pointer argument, see if we can tell that it
+ // is impossible to alias the pointer we're checking. If not, we have to
+ // assume that the call could touch the pointer, even though it doesn't
+ // escape.
+ if (!AA->isNoAlias(AliasAnalysis::Location(*CI),
+ AliasAnalysis::Location(Object))) {
+ return AliasAnalysis::ModRef;
+ }
+ }
+ return AliasAnalysis::NoModRef;
}
/// getPointerDependencyFrom - Return the instruction on which a memory
@@ -478,7 +566,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
}
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- switch (AA->getModRefInfo(Inst, MemLoc)) {
+ switch (getModRefInfo(Inst, MemLoc)) {
case AliasAnalysis::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 7e22ddc61c09..38cb1c91f8f8 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
@@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) {
return true;
if (isa<CastInst>(Inst) &&
- Inst->isSafeToSpeculativelyExecute())
+ isSafeToSpeculativelyExecute(Inst))
return true;
if (Inst->getOpcode() == Instruction::Add &&
@@ -73,7 +74,6 @@ static bool VerifySubExpr(Value *Expr,
errs() << *I << '\n';
llvm_unreachable("Either something is missing from InstInputs or "
"CanPHITrans is wrong.");
- return false;
}
// Validate the operands of the instruction.
@@ -100,7 +100,6 @@ bool PHITransAddr::Verify() const {
for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n";
llvm_unreachable("This is unexpected.");
- return false;
}
// a-ok.
@@ -186,7 +185,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// operands need to be phi translated, and if so, reconstruct it.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ if (!isSafeToSpeculativelyExecute(Cast)) return 0;
Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
if (PHIIn == 0) return 0;
if (PHIIn == Cast->getOperand(0))
@@ -228,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) {
+ if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
@@ -284,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
@@ -381,7 +380,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// Handle cast of PHI translatable value.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ if (!isSafeToSpeculativelyExecute(Cast)) return 0;
Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
CurBB, PredBB, DT, NewInsts);
if (OpVal == 0) return 0;
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index 0e3b6e69ce34..80c5222a27a5 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -386,8 +386,8 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
}
TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
- if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
- || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator))
+ if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
+ isa<ResumeInst>(terminator))
addEdge(currentNode, getExit(),0);
currentNode->setColor(BallLarusNode::GRAY);
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index 0ae734e259db..0fcdfe75aefd 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -137,22 +137,22 @@ bool PathProfileVerifier::runOnModule (Module &M) {
BasicBlock* source = nextEdge->getSource();
BasicBlock* target = nextEdge->getTarget();
unsigned duplicateNumber = nextEdge->getDuplicateNumber();
- DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
- << "}--> " << target->getNameStr());
+ DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
+ << "}--> " << target->getName());
// Ensure all the referenced edges exist
// TODO: make this a separate function
if( !arrayMap.count(source) ) {
- errs() << " error [" << F->getNameStr() << "()]: source '"
- << source->getNameStr()
+ errs() << " error [" << F->getName() << "()]: source '"
+ << source->getName()
<< "' does not exist in the array map.\n";
} else if( !arrayMap[source].count(target) ) {
- errs() << " error [" << F->getNameStr() << "()]: target '"
- << target->getNameStr()
+ errs() << " error [" << F->getName() << "()]: target '"
+ << target->getName()
<< "' does not exist in the array map.\n";
} else if( !arrayMap[source][target].count(duplicateNumber) ) {
- errs() << " error [" << F->getNameStr() << "()]: edge "
- << source->getNameStr() << " -> " << target->getNameStr()
+ errs() << " error [" << F->getName() << "()]: edge "
+ << source->getName() << " -> " << target->getName()
<< " duplicate number " << duplicateNumber
<< " does not exist in the array map.\n";
} else {
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index b594e2ba5506..63468f842612 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -332,7 +332,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
// Clear Minimal Edges.
MinimalWeight.clear();
- DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n");
+ DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
// Since the entry block is the first one and has no predecessors, the edge
// (0,entry) is inserted with the starting weight of 1.
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 098079bcffc4..c4da8079a511 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -160,7 +160,7 @@ bool LoaderPass::runOnModule(Module &M) {
ReadCount = 0;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
- DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
readEdge(getEdge(0,&F->getEntryBlock()), Counters);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
TerminatorInst *TI = BB->getTerminator();
@@ -181,7 +181,7 @@ bool LoaderPass::runOnModule(Module &M) {
ReadCount = 0;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
- DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
readEdge(getEdge(0,&F->getEntryBlock()), Counters);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
TerminatorInst *TI = BB->getTerminator();
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index a01751849c51..0cb158865afe 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -30,7 +30,7 @@ static cl::opt<bool,false>
ProfileVerifierDisableAssertions("profile-verifier-noassert",
cl::desc("Disable assertions"));
-namespace llvm {
+namespace {
template<class FType, class BType>
class ProfileVerifierPassT : public FunctionPass {
@@ -125,8 +125,8 @@ namespace llvm {
outCount++;
}
}
- dbgs() << "Block " << BB->getNameStr() << " in "
- << BB->getParent()->getNameStr() << ":"
+ dbgs() << "Block " << BB->getName() << " in "
+ << BB->getParent()->getName() << ":"
<< "BBWeight=" << format("%20.20g",BBWeight) << ","
<< "inWeight=" << format("%20.20g",inWeight) << ","
<< "inCount=" << inCount << ","
@@ -143,8 +143,8 @@ namespace llvm {
template<class FType, class BType>
void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
- dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
- << DI->BB->getParent()->getNameStr() << ":"
+ dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in "
+ << DI->BB->getParent()->getName() << ":"
<< "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
<< "inWeight=" << format("%20.20g",DI->inWeight) << ","
<< "inCount=" << DI->inCount << ","
@@ -201,13 +201,13 @@ namespace llvm {
double EdgeWeight = PI->getEdgeWeight(E);
if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
ASSERTMESSAGE("Edge has missing value");
return 0;
} else {
if (EdgeWeight < 0) {
dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
ASSERTMESSAGE("Edge has negative value");
}
return EdgeWeight;
@@ -220,8 +220,8 @@ namespace llvm {
DetailedBlockInfo *DI) {
if (Error) {
DEBUG(debugEntry(DI));
- dbgs() << "Block " << DI->BB->getNameStr() << " in Function "
- << DI->BB->getParent()->getNameStr() << ": ";
+ dbgs() << "Block " << DI->BB->getName() << " in Function "
+ << DI->BB->getParent()->getName() << ": ";
ASSERTMESSAGE(Message);
}
return;
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 52753cbe85af..b507b1e340f5 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -186,18 +186,16 @@ std::string Region::getNameStr() const {
raw_string_ostream OS(entryName);
WriteAsOperand(OS, getEntry(), false);
- entryName = OS.str();
} else
- entryName = getEntry()->getNameStr();
+ entryName = getEntry()->getName();
if (getExit()) {
if (getExit()->getName().empty()) {
raw_string_ostream OS(exitName);
WriteAsOperand(OS, getExit(), false);
- exitName = OS.str();
} else
- exitName = getExit()->getNameStr();
+ exitName = getExit()->getName();
} else
exitName = "<Function Return>";
@@ -652,7 +650,7 @@ void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
// This basic block is a start block of a region. It is already in the
// BBtoRegion relation. Only the child basic blocks have to be updated.
if (it != BBtoRegion.end()) {
- Region *newRegion = it->second;;
+ Region *newRegion = it->second;
region->addSubRegion(getTopMostParent(newRegion));
region = newRegion;
} else {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e0ac56c65e76..1d55642079a0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -74,6 +74,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
@@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
@@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const {
OS << OpStr;
}
OS << ")";
+ switch (NAry->getSCEVType()) {
+ case scAddExpr:
+ case scMulExpr:
+ if (NAry->getNoWrapFlags(FlagNUW))
+ OS << "<nuw>";
+ if (NAry->getNoWrapFlags(FlagNSW))
+ OS << "<nsw>";
+ }
return;
}
case scUDivExpr: {
@@ -249,11 +259,9 @@ Type *SCEV::getType() const {
return cast<SCEVUnknown>(this)->getType();
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return 0;
- default: break;
+ default:
+ llvm_unreachable("Unknown SCEV kind!");
}
- llvm_unreachable("Unknown SCEV kind!");
- return 0;
}
bool SCEV::isZero() const {
@@ -274,6 +282,20 @@ bool SCEV::isAllOnesValue() const {
return false;
}
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+ if (!Mul) return false;
+
+ // If there is a constant factor, it will be first.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+ if (!SC) return false;
+
+ // Return true if the value is negative, this matches things like (-42 * V).
+ return SC->getValue()->getValue().isNegative();
+}
+
SCEVCouldNotCompute::SCEVCouldNotCompute() :
SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
@@ -587,11 +609,8 @@ namespace {
}
default:
- break;
+ llvm_unreachable("Unknown SCEV kind!");
}
-
- llvm_unreachable("Unknown SCEV kind!");
- return 0;
}
};
}
@@ -2581,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2590,7 +2609,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
Constant *C = ConstantExpr::getAlignOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2607,7 +2626,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2617,7 +2636,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
Constant *FieldNo) {
Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -3108,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, TD, DT))
+ if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3168,7 +3187,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
// Add the total offset from all the GEP indices to the base.
return getAddExpr(BaseS, TotalOffset,
- isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+ isInBounds ? SCEV::FlagNUW : SCEV::FlagAnyWrap);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3242,9 +3261,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+ ComputeMaskedBits(U->getValue(), Zeros, Ones);
return Zeros.countTrailingOnes();
}
@@ -3382,9 +3400,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+ ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
@@ -3584,6 +3601,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
+ //
+ // Don't apply this instruction's NSW or NUW flags to the new
+ // expression. The instruction may be guarded by control flow that the
+ // no-wrap behavior depends on. Non-control-equivalent instructions can be
+ // mapped to the same SCEV expression, and it would be incorrect to transfer
+ // NSW/NUW semantics to those operations.
SmallVector<const SCEV *, 4> AddOps;
AddOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
@@ -3598,16 +3621,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
AddOps.push_back(Op1);
}
AddOps.push_back(getSCEV(U->getOperand(0)));
- SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
- OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V);
- if (OBO->hasNoSignedWrap())
- setFlags(Flags, SCEV::FlagNSW);
- if (OBO->hasNoUnsignedWrap())
- setFlags(Flags, SCEV::FlagNUW);
- return getAddExpr(AddOps, Flags);
+ return getAddExpr(AddOps);
}
case Instruction::Mul: {
- // See the Add code above.
+ // Don't transfer NSW/NUW for the same reason as AddExpr.
SmallVector<const SCEV *, 4> MulOps;
MulOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0);
@@ -3641,9 +3658,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// knew about to reconstruct a low-bits mask value.
unsigned LZ = A.countLeadingZeros();
unsigned BitWidth = A.getBitWidth();
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+ ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
@@ -3915,13 +3931,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
//
/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the maximum trip count is very large
-/// (>= 2^32)
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
- BasicBlock *ExitBlock) {
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::
+getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
const SCEVConstant *ExitCount =
- dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
if (!ExitCount)
return 0;
@@ -3944,9 +3966,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
/// multiple of a constant (which is also the case if the trip count is simply
/// constant, use getSmallConstantTripCount for that case), Will also return 1
/// if the trip count is very large (>= 2^32).
-unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
- BasicBlock *ExitBlock) {
- const SCEV *ExitCount = getExitCount(L, ExitBlock);
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned ScalarEvolution::
+getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
+ const SCEV *ExitCount = getExitCount(L, ExitingBlock);
if (ExitCount == getCouldNotCompute())
return 1;
@@ -4153,13 +4178,19 @@ void ScalarEvolution::forgetValue(Value *V) {
}
/// getExact - Get the exact loop backedge taken count considering all loop
-/// exits. If all exits are computable, this is the minimum computed count.
+/// exits. A computable result can only be return for loops with a single exit.
+/// Returning the minimum taken count among all exits is incorrect because one
+/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
+/// the limit of each loop test is never skipped. This is a valid assumption as
+/// long as the loop exits via that test. For precise results, it is the
+/// caller's responsibility to specify the relevant loop exit using
+/// getExact(ExitingBlock, SE).
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
// If any exits were not computable, the loop is not computable.
if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
- // We need at least one computable exit.
+ // We need exactly one computable exit.
if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
@@ -4171,8 +4202,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
if (!BECount)
BECount = ENT->ExactNotTaken;
- else
- BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken);
+ else if (BECount != ENT->ExactNotTaken)
+ return SE->getCouldNotCompute();
}
assert(BECount && "Invalid not taken count for loop exit");
return BECount;
@@ -4253,8 +4284,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
if (MaxBECount == getCouldNotCompute())
MaxBECount = EL.Max;
- else if (EL.Max != getCouldNotCompute())
- MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max);
+ else if (EL.Max != getCouldNotCompute()) {
+ // We cannot take the "min" MaxBECount, because non-unit stride loops may
+ // skip some loop tests. Taking the max over the exits is sufficiently
+ // conservative. TODO: We could do better taking into consideration
+ // that (1) the loop has unit stride (2) the last loop test is
+ // less-than/greater-than (3) any loop test is less-than/greater-than AND
+ // falls-through some constant times less then the other tests.
+ MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+ }
}
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
@@ -4539,40 +4577,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
return cast<SCEVConstant>(Val)->getValue();
}
-/// GetAddressedElementFromGlobal - Given a global variable with an initializer
-/// and a GEP expression (missing the pointer index) indexing into it, return
-/// the addressed element of the initializer or null if the index expression is
-/// invalid.
-static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
- const std::vector<ConstantInt*> &Indices) {
- Constant *Init = GV->getInitializer();
- for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
- uint64_t Idx = Indices[i]->getZExtValue();
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
- assert(Idx < CS->getNumOperands() && "Bad struct index!");
- Init = cast<Constant>(CS->getOperand(Idx));
- } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
- if (Idx >= CA->getNumOperands()) return 0; // Bogus program
- Init = cast<Constant>(CA->getOperand(Idx));
- } else if (isa<ConstantAggregateZero>(Init)) {
- if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
- assert(Idx < STy->getNumElements() && "Bad struct index!");
- Init = Constant::getNullValue(STy->getElementType(Idx));
- } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
- if (Idx >= ATy->getNumElements()) return 0; // Bogus program
- Init = Constant::getNullValue(ATy->getElementType());
- } else {
- llvm_unreachable("Unknown constant aggregate type!");
- }
- return 0;
- } else {
- return 0; // Unknown initializer type
- }
- }
- return Init;
-}
-
/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
@@ -4600,7 +4604,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
// Okay, we allow one non-constant index into the GEP instruction.
Value *VarIdx = 0;
- std::vector<ConstantInt*> Indexes;
+ std::vector<Constant*> Indexes;
unsigned VarIdxNum = 0;
for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
@@ -4612,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
Indexes.push_back(0);
}
+ // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
+ if (!VarIdx)
+ return getCouldNotCompute();
+
// Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
// Check to see if X is a loop variant variable value now.
const SCEV *Idx = getSCEV(VarIdx);
@@ -4634,7 +4642,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
// Form the GEP offset.
Indexes[VarIdxNum] = Val;
- Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+ Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+ Indexes);
if (Result == 0) break; // Cannot compute!
// Evaluate the condition for this iteration.
@@ -4658,7 +4667,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
/// specified type, assuming that all operands were constants.
static bool CanConstantFold(const Instruction *I) {
if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
- isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+ isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<LoadInst>(I))
return true;
if (const CallInst *CI = dyn_cast<CallInst>(I))
@@ -4748,16 +4758,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
/// reason, return null.
static Constant *EvaluateExpression(Value *V, const Loop *L,
DenseMap<Instruction *, Constant *> &Vals,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return 0;
- Instruction *I = cast<Instruction>(V);
if (Constant *C = Vals.lookup(I)) return C;
- assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant");
- assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop");
- (void)L;
+ // An instruction inside the loop depends on a value outside the loop that we
+ // weren't given a mapping for, or a value such as a call inside the loop.
+ if (!canConstantEvolve(I, L)) return 0;
+
+ // An unmapped PHI can be due to a branch or another loop inside this loop,
+ // or due to this not being the initial iteration through a loop where we
+ // couldn't compute the evolution of this particular PHI last time.
+ if (isa<PHINode>(I)) return 0;
std::vector<Constant*> Operands(I->getNumOperands());
@@ -4768,16 +4785,21 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
if (!Operands[i]) return 0;
continue;
}
- Constant *C = EvaluateExpression(Operand, L, Vals, TD);
+ Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
Vals[Operand] = C;
if (!C) return 0;
Operands[i] = C;
}
- if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], TD);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD);
+ Operands[1], TD, TLI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+ }
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+ TLI);
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -4798,23 +4820,26 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
- // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis.
DenseMap<Instruction *, Constant *> CurrentIterVals;
+ BasicBlock *Header = L->getHeader();
+ assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
// Since the loop is canonicalized, the PHI node must have two entries. One
// entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- Constant *StartCST =
- dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
- if (StartCST == 0)
- return RetVal = 0; // Must be a constant.
- CurrentIterVals[PN] = StartCST;
+ PHINode *PHI = 0;
+ for (BasicBlock::iterator I = Header->begin();
+ (PHI = dyn_cast<PHINode>(I)); ++I) {
+ Constant *StartCST =
+ dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) continue;
+ CurrentIterVals[PHI] = StartCST;
+ }
+ if (!CurrentIterVals.count(PN))
+ return RetVal = 0;
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
- if (getConstantEvolvingPHI(BEValue, L) != PN &&
- !isa<Constant>(BEValue))
- return RetVal = 0; // Not derived from same PHI.
// Execute the loop symbolically to determine the exit value.
if (BEs.getActiveBits() >= 32)
@@ -4826,15 +4851,46 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
- // Compute the value of the PHI node for the next iteration.
+ // Compute the value of the PHIs for the next iteration.
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
- Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
- if (NextPHI == CurrentIterVals[PN])
- return RetVal = NextPHI; // Stopped evolving!
+ DenseMap<Instruction *, Constant *> NextIterVals;
+ Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+ TLI);
if (NextPHI == 0)
return 0; // Couldn't evaluate!
- DenseMap<Instruction *, Constant *> NextIterVals;
NextIterVals[PN] = NextPHI;
+
+ bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
+
+ // Also evaluate the other PHI nodes. However, we don't get to stop if we
+ // cease to be able to evaluate one of them or if they stop evolving,
+ // because that doesn't necessarily prevent us from computing PN.
+ SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
+ for (DenseMap<Instruction *, Constant *>::const_iterator
+ I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+ PHINode *PHI = dyn_cast<PHINode>(I->first);
+ if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
+ PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+ }
+ // We use two distinct loops because EvaluateExpression may invalidate any
+ // iterators into CurrentIterVals.
+ for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
+ I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
+ PHINode *PHI = I->first;
+ Constant *&NextPHI = NextIterVals[PHI];
+ if (!NextPHI) { // Not already computed.
+ Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+ }
+ if (NextPHI != I->second)
+ StoppedEvolving = false;
+ }
+
+ // If all entries in CurrentIterVals == NextIterVals then we can stop
+ // iterating, the loop can't continue to change.
+ if (StoppedEvolving)
+ return RetVal = CurrentIterVals[PN];
+
CurrentIterVals.swap(NextIterVals);
}
}
@@ -4844,9 +4900,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
/// try to evaluate a few iterations of the loop until we get the exit
/// condition gets a value of ExitWhen (true or false). If we cannot
/// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
- Value *Cond,
- bool ExitWhen) {
+const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+ Value *Cond,
+ bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
if (PN == 0) return getCouldNotCompute();
@@ -4854,29 +4910,33 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// That's the only form we support here.
if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+ DenseMap<Instruction *, Constant *> CurrentIterVals;
+ BasicBlock *Header = L->getHeader();
+ assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
// One entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- Constant *StartCST =
- dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
- if (StartCST == 0) return getCouldNotCompute(); // Must be a constant.
-
- Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
- if (getConstantEvolvingPHI(BEValue, L) != PN &&
- !isa<Constant>(BEValue))
- return getCouldNotCompute(); // Not derived from same PHI.
+ PHINode *PHI = 0;
+ for (BasicBlock::iterator I = Header->begin();
+ (PHI = dyn_cast<PHINode>(I)); ++I) {
+ Constant *StartCST =
+ dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) continue;
+ CurrentIterVals[PHI] = StartCST;
+ }
+ if (!CurrentIterVals.count(PN))
+ return getCouldNotCompute();
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
- unsigned IterationNum = 0;
+
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
- for (Constant *PHIVal = StartCST;
- IterationNum != MaxIterations; ++IterationNum) {
- DenseMap<Instruction *, Constant *> PHIValMap;
- PHIValMap[PN] = PHIVal;
+ for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD));
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+ TD, TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -4886,11 +4946,29 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
return getConstant(Type::getInt32Ty(getContext()), IterationNum);
}
- // Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD);
- if (NextPHI == 0 || NextPHI == PHIVal)
- return getCouldNotCompute();// Couldn't evaluate or not making progress...
- PHIVal = NextPHI;
+ // Update all the PHI nodes for the next iteration.
+ DenseMap<Instruction *, Constant *> NextIterVals;
+
+ // Create a list of which PHIs we need to compute. We want to do this before
+ // calling EvaluateExpression on them because that may invalidate iterators
+ // into CurrentIterVals.
+ SmallVector<PHINode *, 8> PHIsToCompute;
+ for (DenseMap<Instruction *, Constant *>::const_iterator
+ I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+ PHINode *PHI = dyn_cast<PHINode>(I->first);
+ if (!PHI || PHI->getParent() != Header) continue;
+ PHIsToCompute.push_back(PHI);
+ }
+ for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
+ E = PHIsToCompute.end(); I != E; ++I) {
+ PHINode *PHI = *I;
+ Constant *&NextPHI = NextIterVals[PHI];
+ if (NextPHI) continue; // Already computed!
+
+ Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+ }
+ CurrentIterVals.swap(NextIterVals);
}
// Too many iterations were needed to evaluate.
@@ -4921,6 +4999,98 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
return C;
}
+/// This builds up a Constant using the ConstantExpr interface. That way, we
+/// will return Constants for objects which aren't represented by a
+/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
+/// Returns NULL if the SCEV isn't representable as a Constant.
+static Constant *BuildConstantFromSCEV(const SCEV *V) {
+ switch (V->getSCEVType()) {
+ default: // TODO: smax, umax.
+ case scCouldNotCompute:
+ case scAddRecExpr:
+ break;
+ case scConstant:
+ return cast<SCEVConstant>(V)->getValue();
+ case scUnknown:
+ return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+ return ConstantExpr::getSExt(CastOp, SS->getType());
+ break;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+ return ConstantExpr::getZExt(CastOp, SZ->getType());
+ break;
+ }
+ case scTruncate: {
+ const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+ return ConstantExpr::getTrunc(CastOp, ST->getType());
+ break;
+ }
+ case scAddExpr: {
+ const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+ if (C->getType()->isPointerTy())
+ C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+ for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+ if (!C2) return 0;
+
+ // First pointer!
+ if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ std::swap(C, C2);
+ // The offsets have been converted to bytes. We can add bytes to an
+ // i8* by GEP with the byte count in the first index.
+ C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+ }
+
+ // Don't bother trying to sum two pointers. We probably can't
+ // statically compute a load that results from it anyway.
+ if (C2->getType()->isPointerTy())
+ return 0;
+
+ if (C->getType()->isPointerTy()) {
+ if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+ C2 = ConstantExpr::getIntegerCast(
+ C2, Type::getInt32Ty(C->getContext()), true);
+ C = ConstantExpr::getGetElementPtr(C, C2);
+ } else
+ C = ConstantExpr::getAdd(C, C2);
+ }
+ return C;
+ }
+ break;
+ }
+ case scMulExpr: {
+ const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+ // Don't bother with pointers at all.
+ if (C->getType()->isPointerTy()) return 0;
+ for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+ if (!C2 || C2->getType()->isPointerTy()) return 0;
+ C = ConstantExpr::getMul(C, C2);
+ }
+ return C;
+ }
+ break;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+ if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+ if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+ if (LHS->getType() == RHS->getType())
+ return ConstantExpr::getUDiv(LHS, RHS);
+ break;
+ }
+ }
+ return 0;
+}
+
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (isa<SCEVConstant>(V)) return V;
@@ -4973,11 +5143,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
const SCEV *OpV = getSCEVAtScope(OrigV, L);
MadeImprovement |= OrigV != OpV;
- Constant *C = 0;
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
- C = SC->getValue();
- if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
- C = dyn_cast<Constant>(SU->getValue());
+ Constant *C = BuildConstantFromSCEV(OpV);
if (!C) return V;
if (C->getType() != Op->getType())
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
@@ -4992,10 +5158,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
Constant *C = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- Operands[0], Operands[1], TD);
- else
+ Operands[0], Operands[1], TD,
+ TLI);
+ else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+ } else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- Operands, TD);
+ Operands, TD, TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -5113,7 +5283,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
}
llvm_unreachable("Unknown SCEV type!");
- return 0;
}
/// getSCEVAtScope - This is a convenience function which does
@@ -5350,10 +5519,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
// behavior. Loops must exhibit defined behavior until a wrapped value is
// actually used. So the trip count computed by udiv could be smaller than the
// number of well-defined iterations.
- if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+ if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
// FIXME: We really want an "isexact" bit for udiv.
return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-
+ }
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -5744,7 +5913,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
switch (Pred) {
default:
llvm_unreachable("Unexpected ICmpInst::Predicate value!");
- break;
case ICmpInst::ICMP_SGT:
Pred = ICmpInst::ICMP_SLT;
std::swap(LHS, RHS);
@@ -6089,8 +6257,9 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
return getCouldNotCompute();
// Check to see if we have a flag which makes analysis easy.
- bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
- AddRec->getNoWrapFlags(SCEV::FlagNUW);
+ bool NoWrap = isSigned ?
+ AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
+ AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
if (AddRec->isAffine()) {
unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
@@ -6381,6 +6550,7 @@ bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
LI = &getAnalysis<LoopInfo>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
return false;
}
@@ -6417,6 +6587,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<LoopInfo>();
AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -6592,11 +6763,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
return LoopInvariant;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return LoopVariant;
- default: break;
+ default: llvm_unreachable("Unknown SCEV kind!");
}
- llvm_unreachable("Unknown SCEV kind!");
- return LoopVariant;
}
bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -6678,11 +6846,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
return ProperlyDominatesBlock;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return DoesNotDominateBlock;
- default: break;
+ default:
+ llvm_unreachable("Unknown SCEV kind!");
}
- llvm_unreachable("Unknown SCEV kind!");
- return DoesNotDominateBlock;
}
bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -6728,11 +6894,9 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return false;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return false;
- default: break;
+ default:
+ llvm_unreachable("Unknown SCEV kind!");
}
- llvm_unreachable("Unknown SCEV kind!");
- return false;
}
void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 47f0f321161b..69507beeaae9 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,6 +19,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -30,6 +31,19 @@ using namespace llvm;
Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Instruction::CastOps Op,
BasicBlock::iterator IP) {
+ // This function must be called with the builder having a valid insertion
+ // point. It doesn't need to be the actual IP where the uses of the returned
+ // cast will be added, but it must dominate such IP.
+ // We use this precondition to produce a cast that will dominate all its
+ // uses. In particular, this is crucial for the case where the builder's
+ // insertion point *is* the point where we were asked to put the cast.
+ // Since we don't know the the builder's insertion point is actually
+ // where the uses will be added (only that it dominates it), we are
+ // not allowed to move it.
+ BasicBlock::iterator BIP = Builder.GetInsertPoint();
+
+ Instruction *Ret = NULL;
+
// Check to see if there is already a cast!
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
UI != E; ++UI) {
@@ -37,27 +51,35 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
if (U->getType() == Ty)
if (CastInst *CI = dyn_cast<CastInst>(U))
if (CI->getOpcode() == Op) {
- // If the cast isn't where we want it, fix it.
- if (BasicBlock::iterator(CI) != IP) {
+ // If the cast isn't where we want it, create a new cast at IP.
+ // Likewise, do not reuse a cast at BIP because it must dominate
+ // instructions that might be inserted before BIP.
+ if (BasicBlock::iterator(CI) != IP || BIP == IP) {
// Create a new cast, and leave the old cast in place in case
// it is being used as an insert point. Clear its operand
// so that it doesn't hold anything live.
- Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
- NewCI->takeName(CI);
- CI->replaceAllUsesWith(NewCI);
+ Ret = CastInst::Create(Op, V, Ty, "", IP);
+ Ret->takeName(CI);
+ CI->replaceAllUsesWith(Ret);
CI->setOperand(0, UndefValue::get(V->getType()));
- rememberInstruction(NewCI);
- return NewCI;
+ break;
}
- rememberInstruction(CI);
- return CI;
+ Ret = CI;
+ break;
}
}
// Create a new cast.
- Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
- rememberInstruction(I);
- return I;
+ if (!Ret)
+ Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
+
+ // We assert at the end of the function since IP might point to an
+ // instruction with different dominance properties than a cast
+ // (an invoke for example) and not dominate BIP (but the cast does).
+ assert(SE.DT->dominates(Ret, BIP));
+
+ rememberInstruction(Ret);
+ return Ret;
}
/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
@@ -73,9 +95,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
"InsertNoopCastOfTo cannot change sizes!");
// Short-circuit unnecessary bitcasts.
- if (Op == Instruction::BitCast && V->getType() == Ty)
- return V;
-
+ if (Op == Instruction::BitCast) {
+ if (V->getType() == Ty)
+ return V;
+ if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->getOperand(0)->getType() == Ty)
+ return CI->getOperand(0);
+ }
+ }
// Short-circuit unnecessary inttoptr<->ptrtoint casts.
if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
@@ -115,8 +142,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
BasicBlock::iterator IP = I; ++IP;
if (InvokeInst *II = dyn_cast<InvokeInst>(I))
IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) ||
- isa<LandingPadInst>(IP))
+ while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
++IP;
return ReuseOrCreateCast(I, Ty, Op, IP);
}
@@ -492,6 +518,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
V = InsertNoopCastOfTo(V,
Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+ assert(!isa<Instruction>(V) ||
+ SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
+
// Expand the operands for a plain byte offset.
Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
@@ -588,20 +617,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
return expand(SE.getAddExpr(Ops));
}
-/// isNonConstantNegative - Return true if the specified scev is negated, but
-/// not a constant.
-static bool isNonConstantNegative(const SCEV *F) {
- const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
- if (!Mul) return false;
-
- // If there is a constant factor, it will be first.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
- if (!SC) return false;
-
- // Return true if the value is negative, this matches things like (-42 * V).
- return SC->getValue()->getValue().isNegative();
-}
-
/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
/// SCEV expansion. If they are nested, this is the most nested. If they are
/// neighboring, pick the later.
@@ -655,7 +670,6 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
return RelevantLoops[D] = Result;
}
llvm_unreachable("Unexpected SCEV type!");
- return 0;
}
namespace {
@@ -680,10 +694,10 @@ public:
// If one operand is a non-constant negative and the other is not,
// put the non-constant negative on the right so that a sub can
// be used instead of a negate and add.
- if (isNonConstantNegative(LHS.second)) {
- if (!isNonConstantNegative(RHS.second))
+ if (LHS.second->isNonConstantNegative()) {
+ if (!RHS.second->isNonConstantNegative())
return false;
- } else if (isNonConstantNegative(RHS.second))
+ } else if (RHS.second->isNonConstantNegative())
return true;
// Otherwise they are equivalent according to this comparison.
@@ -744,7 +758,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
for (++I; I != E && I->first == CurLoop; ++I)
NewOps.push_back(I->second);
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
- } else if (isNonConstantNegative(Op)) {
+ } else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
Sum = InsertNoopCastOfTo(Sum, Ty);
@@ -875,58 +889,138 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
return isNormalAddRecExprPHI(PN, IncV, L);
}
-/// Determine if this cyclic phi is in a form that would have been generated by
-/// LSR. We don't care if the phi was actually expanded in this pass, as long
-/// as it is in a low-cost form, for example, no implied multiplication. This
-/// should match any patterns generated by getAddRecExprPHILiterally and
-/// expandAddtoGEP.
-bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
- const Loop *L) {
+/// getIVIncOperand returns an induction variable increment's induction
+/// variable operand.
+///
+/// If allowScale is set, any type of GEP is allowed as long as the nonIV
+/// operands dominate InsertPos.
+///
+/// If allowScale is not set, ensure that a GEP increment conforms to one of the
+/// simple patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
+Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
+ Instruction *InsertPos,
+ bool allowScale) {
+ if (IncV == InsertPos)
+ return NULL;
+
switch (IncV->getOpcode()) {
+ default:
+ return NULL;
// Check for a simple Add/Sub or GEP of a loop invariant step.
case Instruction::Add:
- case Instruction::Sub:
- return IncV->getOperand(0) == PN
- && L->isLoopInvariant(IncV->getOperand(1));
+ case Instruction::Sub: {
+ Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
+ if (!OInst || SE.DT->dominates(OInst, InsertPos))
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ return NULL;
+ }
case Instruction::BitCast:
- IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0));
- if (!IncV)
- return false;
- // fall-thru to GEP handling
- case Instruction::GetElementPtr: {
- // This must be a pointer addition of constants (pretty) or some number of
- // address-size elements (ugly).
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ case Instruction::GetElementPtr:
for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
I != E; ++I) {
if (isa<Constant>(*I))
continue;
- // ugly geps have 2 operands.
- // i1* is used by the expander to represent an address-size element.
+ if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
+ if (!SE.DT->dominates(OInst, InsertPos))
+ return NULL;
+ }
+ if (allowScale) {
+ // allow any kind of GEP as long as it can be hoisted.
+ continue;
+ }
+ // This must be a pointer addition of constants (pretty), which is already
+ // handled, or some number of address-size elements (ugly). Ugly geps
+ // have 2 operands. i1* is used by the expander to represent an
+ // address-size element.
if (IncV->getNumOperands() != 2)
- return false;
+ return NULL;
unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
&& IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
- return false;
- // Ensure the operands dominate the insertion point. I don't know of a
- // case when this would not be true, so this is somewhat untested.
- if (L == IVIncInsertLoop) {
- for (User::op_iterator OI = IncV->op_begin()+1,
- OE = IncV->op_end(); OI != OE; ++OI)
- if (Instruction *OInst = dyn_cast<Instruction>(OI))
- if (!SE.DT->dominates(OInst, IVIncInsertPos))
- return false;
- }
+ return NULL;
break;
}
- IncV = dyn_cast<Instruction>(IncV->getOperand(0));
- if (IncV && IncV->getOpcode() == Instruction::BitCast)
- IncV = dyn_cast<Instruction>(IncV->getOperand(0));
- return IncV == PN;
+ return dyn_cast<Instruction>(IncV->getOperand(0));
}
- default:
+}
+
+/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
+/// it available to other uses in this loop. Recursively hoist any operands,
+/// until we reach a value that dominates InsertPos.
+bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
+ if (SE.DT->dominates(IncV, InsertPos))
+ return true;
+
+ // InsertPos must itself dominate IncV so that IncV's new position satisfies
+ // its existing users.
+ if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
return false;
+
+ // Check that the chain of IV operands leading back to Phi can be hoisted.
+ SmallVector<Instruction*, 4> IVIncs;
+ for(;;) {
+ Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
+ if (!Oper)
+ return false;
+ // IncV is safe to hoist.
+ IVIncs.push_back(IncV);
+ IncV = Oper;
+ if (SE.DT->dominates(IncV, InsertPos))
+ break;
+ }
+ for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
+ E = IVIncs.rend(); I != E; ++I) {
+ (*I)->moveBefore(InsertPos);
+ }
+ return true;
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ for(Instruction *IVOper = IncV;
+ (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
+ /*allowScale=*/false));) {
+ if (IVOper == PN)
+ return true;
}
+ return false;
+}
+
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+ Type *ExpandTy, Type *IntTy,
+ bool useSubtract) {
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (ExpandTy->isPointerTy()) {
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+ IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType()) {
+ IncV = Builder.CreateBitCast(IncV, PN->getType());
+ rememberInstruction(IncV);
+ }
+ } else {
+ IncV = useSubtract ?
+ Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+ Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+ rememberInstruction(IncV);
+ }
+ return IncV;
}
/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
@@ -956,26 +1050,28 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (LSRMode) {
if (!isExpandedAddRecExprPHI(PN, IncV, L))
continue;
+ if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
+ continue;
}
else {
if (!isNormalAddRecExprPHI(PN, IncV, L))
continue;
+ if (L == IVIncInsertLoop)
+ do {
+ if (SE.DT->dominates(IncV, IVIncInsertPos))
+ break;
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ IncV->moveBefore(IVIncInsertPos);
+ IVIncInsertPos = IncV;
+ IncV = cast<Instruction>(IncV->getOperand(0));
+ } while (IncV != PN);
}
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(PN);
// Remember the increment.
rememberInstruction(IncV);
- if (L == IVIncInsertLoop)
- do {
- if (SE.DT->dominates(IncV, IVIncInsertPos))
- break;
- // Make sure the increment is where we want it. But don't move it
- // down past a potential existing post-inc user.
- IncV->moveBefore(IVIncInsertPos);
- IVIncInsertPos = IncV;
- IncV = cast<Instruction>(IncV->getOperand(0));
- } while (IncV != PN);
return PN;
}
}
@@ -984,6 +1080,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ // Another AddRec may need to be recursively expanded below. For example, if
+ // this AddRec is quadratic, the StepV may itself be an AddRec in this
+ // loop. Remove this loop from the PostIncLoops set before expanding such
+ // AddRecs. Otherwise, we cannot find a valid position for the step
+ // (i.e. StepV can never dominate its loop header). Ideally, we could do
+ // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
+ // so it's not worth implementing SmallPtrSet::swap.
+ PostIncLoopSet SavedPostIncLoops = PostIncLoops;
+ PostIncLoops.clear();
+
// Expand code for the start value.
Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
L->getHeader()->begin());
@@ -993,16 +1099,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
L->getHeader()));
- // Expand code for the step value. Insert instructions right before the
- // terminator corresponding to the back-edge. Do this before creating the PHI
- // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
- // negative, insert a sub instead of an add for the increment (unless it's a
- // constant, because subtracts of constants are canonicalized to adds).
+ // Expand code for the step value. Do this before creating the PHI so that PHI
+ // reuse code doesn't see an incomplete PHI.
const SCEV *Step = Normalized->getStepRecurrence(SE);
- bool isPointer = ExpandTy->isPointerTy();
- bool isNegative = !isPointer && isNonConstantNegative(Step);
- if (isNegative)
+ // If the stride is negative, insert a sub instead of an add for the increment
+ // (unless it's a constant, because subtracts of constants are canonicalized
+ // to adds).
+ bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
// Create the PHI.
@@ -1023,33 +1129,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
continue;
}
- // Create a step value and add it to the PHI. If IVIncInsertLoop is
- // non-null and equal to the addrec's loop, insert the instructions
- // at IVIncInsertPos.
+ // Create a step value and add it to the PHI.
+ // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+ // instructions at IVIncInsertPos.
Instruction *InsertPos = L == IVIncInsertLoop ?
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
- Value *IncV;
- // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
- if (isPointer) {
- PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
- // If the step isn't constant, don't use an implicitly scaled GEP, because
- // that would require a multiply inside the loop.
- if (!isa<ConstantInt>(StepV))
- GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
- GEPPtrTy->getAddressSpace());
- const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
- IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
- if (IncV->getType() != PN->getType()) {
- IncV = Builder.CreateBitCast(IncV, PN->getType());
- rememberInstruction(IncV);
- }
- } else {
- IncV = isNegative ?
- Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
- Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
- rememberInstruction(IncV);
- }
+ Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
PN->addIncoming(IncV, Pred);
}
@@ -1057,6 +1144,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (SaveInsertBB)
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ // After expanding subexpressions, restore the PostIncLoops set so the caller
+ // can ensure that IVIncrement dominates the current uses.
+ PostIncLoops = SavedPostIncLoops;
+
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(PN);
@@ -1124,10 +1215,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
- assert((!isa<Instruction>(Result) ||
- SE.DT->dominates(cast<Instruction>(Result),
- Builder.GetInsertPoint())) &&
- "postinc expansion does not dominate use");
+ if (isa<Instruction>(Result)
+ && !SE.DT->dominates(cast<Instruction>(Result),
+ Builder.GetInsertPoint())) {
+ // The induction variable's postinc expansion does not dominate this use.
+ // IVUsers tries to prevent this case, so it is rare. However, it can
+ // happen when an IVUser outside the loop is not dominated by the latch
+ // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+ // all cases. Consider a phi outide whose operand is replaced during
+ // expansion with the value of the postinc user. Without fundamentally
+ // changing the way postinc users are tracked, the only remedy is
+ // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+ // but hopefully expandCodeFor handles that.
+ bool useSubtract =
+ !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ // Restore the insertion point to the place where the caller has
+ // determined dominates all uses.
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ }
}
// Re-apply any non-loop-dominating scale.
@@ -1363,10 +1475,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
}
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
- Instruction *I) {
- BasicBlock::iterator IP = I;
- while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
- ++IP;
+ Instruction *IP) {
Builder.SetInsertPoint(IP->getParent(), IP);
return expandCodeFor(SH, Ty);
}
@@ -1392,14 +1501,23 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (!L) break;
if (BasicBlock *Preheader = L->getLoopPreheader())
InsertPt = Preheader->getTerminator();
+ else {
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = L->getHeader()->getFirstInsertionPt();
+ }
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
InsertPt = L->getHeader()->getFirstInsertionPt();
- while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+ while (InsertPt != Builder.GetInsertPoint()
+ && (isInsertedInstruction(InsertPt)
+ || isa<DbgInfoIntrinsic>(InsertPt))) {
InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+ }
break;
}
@@ -1434,23 +1552,9 @@ void SCEVExpander::rememberInstruction(Value *I) {
InsertedPostIncValues.insert(I);
else
InsertedValues.insert(I);
-
- // If we just claimed an existing instruction and that instruction had
- // been the insert point, adjust the insert point forward so that
- // subsequently inserted code will be dominated.
- if (Builder.GetInsertPoint() == I) {
- BasicBlock::iterator It = cast<Instruction>(I);
- do { ++It; } while (isInsertedInstruction(It) ||
- isa<DbgInfoIntrinsic>(It));
- Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
- }
}
void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
- // If we acquired more instructions since the old insert point was saved,
- // advance past them.
- while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
-
Builder.SetInsertPoint(BB, I);
}
@@ -1478,40 +1582,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
return V;
}
-/// hoistStep - Attempt to hoist an IV increment above a potential use.
-///
-/// To successfully hoist, two criteria must be met:
-/// - IncV operands dominate InsertPos and
-/// - InsertPos dominates IncV
-///
-/// Meeting the second condition means that we don't need to check all of IncV's
-/// existing uses (it's moving up in the domtree).
-///
-/// This does not yet recursively hoist the operands, although that would
-/// not be difficult.
-///
-/// This does not require a SCEVExpander instance and could be replaced by a
-/// general code-insertion helper.
-bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
- const DominatorTree *DT) {
- if (DT->dominates(IncV, InsertPos))
- return true;
-
- if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
- return false;
-
- if (IncV->mayHaveSideEffects())
- return false;
-
- // Attempt to hoist IncV
- for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
- OI != OE; ++OI) {
- Instruction *OInst = dyn_cast<Instruction>(OI);
- if (OInst && !DT->dominates(OInst, InsertPos))
- return false;
- }
- IncV->moveBefore(InsertPos);
- return true;
+/// Sort values by integer width for replaceCongruentIVs.
+static bool width_descending(Value *lhs, Value *rhs) {
+ // Put pointers at the back and make sure pointer < pointer = false.
+ if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy())
+ return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy();
+ return rhs->getType()->getPrimitiveSizeInBits()
+ < lhs->getType()->getPrimitiveSizeInBits();
}
/// replaceCongruentIVs - Check for congruent phis in this loop header and
@@ -1521,23 +1598,45 @@ bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
/// This does not depend on any SCEVExpander state but should be used in
/// the same context that SCEVExpander is used.
unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
- SmallVectorImpl<WeakVH> &DeadInsts) {
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ const TargetLowering *TLI) {
+ // Find integer phis in order of increasing width.
+ SmallVector<PHINode*, 8> Phis;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+ Phis.push_back(Phi);
+ }
+ if (TLI)
+ std::sort(Phis.begin(), Phis.end(), width_descending);
+
unsigned NumElim = 0;
DenseMap<const SCEV *, PHINode *> ExprToIVMap;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- PHINode *Phi = cast<PHINode>(I);
+ // Process phis from wide to narrow. Mapping wide phis to the their truncation
+ // so narrow phis can reuse them.
+ for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
+ PEnd = Phis.end(); PIter != PEnd; ++PIter) {
+ PHINode *Phi = *PIter;
+
if (!SE.isSCEVable(Phi->getType()))
continue;
PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
if (!OrigPhiRef) {
OrigPhiRef = Phi;
+ if (Phi->getType()->isIntegerTy() && TLI
+ && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ // This phi can be freely truncated to the narrowest phi type. Map the
+ // truncated expression to it so it will be reused for narrow types.
+ const SCEV *TruncExpr =
+ SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
+ ExprToIVMap[TruncExpr] = Phi;
+ }
continue;
}
- // If one phi derives from the other via GEPs, types may differ.
- // We could consider adding a bitcast here to handle it.
- if (OrigPhiRef->getType() != Phi->getType())
+ // Replacing a pointer phi with an integer phi or vice-versa doesn't make
+ // sense.
+ if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
continue;
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
@@ -1546,32 +1645,56 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Instruction *IsomorphicInc =
cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
- // If this phi is more canonical, swap it with the original.
- if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)
- && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) {
+ // If this phi has the same width but is more canonical, replace the
+ // original with it. As part of the "more canonical" determination,
+ // respect a prior decision to use an IV chain.
+ if (OrigPhiRef->getType() == Phi->getType()
+ && !(ChainedPhis.count(Phi)
+ || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L))
+ && (ChainedPhis.count(Phi)
+ || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
std::swap(OrigPhiRef, Phi);
std::swap(OrigInc, IsomorphicInc);
}
// Replacing the congruent phi is sufficient because acyclic redundancy
// elimination, CSE/GVN, should handle the rest. However, once SCEV proves
// that a phi is congruent, it's often the head of an IV user cycle that
- // is isomorphic with the original phi. So it's worth eagerly cleaning up
- // the common case of a single IV increment.
- if (OrigInc != IsomorphicInc &&
- OrigInc->getType() == IsomorphicInc->getType() &&
- SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) &&
- hoistStep(OrigInc, IsomorphicInc, DT)) {
+ // is isomorphic with the original phi. It's worth eagerly cleaning up the
+ // common case of a single IV increment so that DeleteDeadPHIs can remove
+ // cycles that had postinc uses.
+ const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc),
+ IsomorphicInc->getType());
+ if (OrigInc != IsomorphicInc
+ && TruncExpr == SE.getSCEV(IsomorphicInc)
+ && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc))
+ || hoistIVInc(OrigInc, IsomorphicInc))) {
DEBUG_WITH_TYPE(DebugType, dbgs()
<< "INDVARS: Eliminated congruent iv.inc: "
<< *IsomorphicInc << '\n');
- IsomorphicInc->replaceAllUsesWith(OrigInc);
+ Value *NewInc = OrigInc;
+ if (OrigInc->getType() != IsomorphicInc->getType()) {
+ Instruction *IP = isa<PHINode>(OrigInc)
+ ? (Instruction*)L->getHeader()->getFirstInsertionPt()
+ : OrigInc->getNextNode();
+ IRBuilder<> Builder(IP);
+ Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
+ NewInc = Builder.
+ CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
+ }
+ IsomorphicInc->replaceAllUsesWith(NewInc);
DeadInsts.push_back(IsomorphicInc);
}
}
DEBUG_WITH_TYPE(DebugType, dbgs()
<< "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
++NumElim;
- Phi->replaceAllUsesWith(OrigPhiRef);
+ Value *NewIV = OrigPhiRef;
+ if (OrigPhiRef->getType() != Phi->getType()) {
+ IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+ NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
+ }
+ Phi->replaceAllUsesWith(NewIV);
DeadInsts.push_back(Phi);
}
return NumElim;
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index c66ecd6e8727..dd2ed4ff831c 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -118,7 +118,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
// Conservatively use AnyWrap until/unless we need FlagNW.
const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
switch (Kind) {
- default: llvm_unreachable("Unexpected transform name!");
case NormalizeAutodetect:
if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
const SCEV *TransformedStep =
@@ -191,7 +190,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
}
llvm_unreachable("Unexpected SCEV kind!");
- return 0;
}
/// Manage recursive transformation across an expression DAG. Revisiting
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index d8c207b4bd49..c819666ee444 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -194,8 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
Succs.assign(TI.getNumSuccessors(), true);
return;
}
-
- Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+ SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+ Succs[Case.getSuccessorIndex()] = true;
}
@@ -327,13 +327,13 @@ void SparseSolver::Solve(Function &F) {
}
void SparseSolver::Print(Function &F, raw_ostream &OS) const {
- OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+ OS << "\nFUNCTION: " << F.getName() << "\n";
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (!BBExecutable.count(BB))
OS << "INFEASIBLE: ";
OS << "\t";
if (BB->hasName())
- OS << BB->getNameStr() << ":\n";
+ OS << BB->getName() << ":\n";
else
OS << "; anon bb\n";
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 68a39cd581f4..ff5010bad7bb 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -34,7 +34,7 @@ Module *Trace::getModule() const {
///
void Trace::print(raw_ostream &O) const {
Function *F = getFunction();
- O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
+ O << "; Trace from function " << F->getName() << ", blocks:\n";
for (const_iterator i = begin(), e = end(); i != e; ++i) {
O << "; ";
WriteAsOperand(O, *i, true, getModule());
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4d94f619fda1..a430f6281ef0 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -20,8 +20,10 @@
#include "llvm/GlobalAlias.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
#include "llvm/Operator.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/PatternMatch.h"
@@ -41,10 +43,176 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
return TD ? TD->getPointerSizeInBits() : 0;
}
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
+static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const TargetData *TD, unsigned Depth) {
+ if (!Add) {
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (!CLHS->getValue().isNegative()) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ }
+ }
+ }
+ }
+
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ // If one of the operands has trailing zeros, then the bits that the
+ // other operand has in those bit positions will be preserved in the
+ // result. For an add, this works with either operand. For a subtract,
+ // this only works if the known zeros are in the right operand.
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
+ assert((LHSKnownZero & LHSKnownOne) == 0 &&
+ "Bits known to be one AND zero?");
+ unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+ llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+ // Determine which operand has more trailing zeros, and use that
+ // many bits from the other operand.
+ if (LHSKnownZeroOut > RHSKnownZeroOut) {
+ if (Add) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+ } else {
+ // If the known zeros are in the left operand for a subtract,
+ // fall back to the minimum known zeros in both operands.
+ KnownZero |= APInt::getLowBitsSet(BitWidth,
+ std::min(LHSKnownZeroOut,
+ RHSKnownZeroOut));
+ }
+ } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+ KnownZero |= LHSKnownZero & Mask;
+ KnownOne |= LHSKnownOne & Mask;
+ }
+
+ // Are we still trying to solve for the sign bit?
+ if (!KnownZero.isNegative() && !KnownOne.isNegative()) {
+ if (NSW) {
+ if (Add) {
+ // Adding two positive numbers can't wrap into negative
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // and adding two negative numbers can't wrap into positive.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ } else {
+ // Subtracting a negative number from a positive one can't wrap
+ if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // neither can subtracting a positive number from a negative one.
+ else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ }
+ }
+}
+
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const TargetData *TD, unsigned Depth) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ bool isKnownNegative = false;
+ bool isKnownNonNegative = false;
+ // If the multiplication is known not to overflow, compute the sign bit.
+ if (NSW) {
+ if (Op0 == Op1) {
+ // The product of a number with itself is non-negative.
+ isKnownNonNegative = true;
+ } else {
+ bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+ bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+ bool isKnownNegativeOp1 = KnownOne.isNegative();
+ bool isKnownNegativeOp0 = KnownOne2.isNegative();
+ // The product of two numbers with the same sign is non-negative.
+ isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+ (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+ // The product of a negative number and a non-negative number is either
+ // negative or zero.
+ if (!isKnownNonNegative)
+ isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+ isKnownNonZero(Op0, TD, Depth)) ||
+ (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+ isKnownNonZero(Op1, TD, Depth));
+ }
+ }
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+
+ // Only make use of no-wrap flags if we failed to compute the sign bit
+ // directly. This matters if the multiplication always overflows, in
+ // which case we prefer to follow the result of the direct computation,
+ // though as the program is invoking undefined behaviour we can choose
+ // whatever we like here.
+ if (isKnownNonNegative && !KnownOne.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ else if (isKnownNegative && !KnownZero.isNegative())
+ KnownOne.setBit(BitWidth - 1);
+}
+
+void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned NumRanges = Ranges.getNumOperands() / 2;
+ assert(NumRanges >= 1);
+
+ // Use the high end of the ranges to find leading zeros.
+ unsigned MinLeadingZeros = BitWidth;
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0));
+ ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ if (Range.isWrappedSet())
+ MinLeadingZeros = 0; // -1 has no zeros
+ unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros();
+ MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros);
+ }
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
+}
+/// ComputeMaskedBits - Determine which of the bits are known to be either zero
+/// or one and return them in the KnownZero/KnownOne bit sets.
+///
/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
/// we cannot optimize based on the assumption that it is zero without changing
/// it to be an explicit zero. If we don't change it to zero, other code could
@@ -54,67 +222,75 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
///
/// This function is defined on values with integer type, values with pointer
/// type (but only if TD is non-null), and vectors of integers. In the case
-/// where V is a vector, the mask, known zero, and known one values are the
+/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
- APInt &KnownZero, APInt &KnownOne,
+void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
const TargetData *TD, unsigned Depth) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
- unsigned BitWidth = Mask.getBitWidth();
- assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
- && "Not integer or pointer type!");
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ assert((V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->getScalarType()->isPointerTy()) &&
+ "Not integer or pointer type!");
assert((!TD ||
TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
+ KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
"V, Mask, KnownOne and KnownZero should have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
- KnownOne = CI->getValue() & Mask;
- KnownZero = ~KnownOne & Mask;
+ KnownOne = CI->getValue();
+ KnownZero = ~KnownOne;
return;
}
// Null and aggregate-zero are all-zeros.
if (isa<ConstantPointerNull>(V) ||
isa<ConstantAggregateZero>(V)) {
KnownOne.clearAllBits();
- KnownZero = Mask;
+ KnownZero = APInt::getAllOnesValue(BitWidth);
return;
}
// Handle a constant vector by taking the intersection of the known bits of
- // each element.
- if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ // each element. There is no real need to handle ConstantVector here, because
+ // we don't handle undef in any particularly useful way.
+ if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+ // We know that CDS must be a vector of integers. Take the intersection of
+ // each element.
KnownZero.setAllBits(); KnownOne.setAllBits();
- for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
- APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
- ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
- TD, Depth);
- KnownZero &= KnownZero2;
- KnownOne &= KnownOne2;
+ APInt Elt(KnownZero.getBitWidth(), 0);
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ Elt = CDS->getElementAsInteger(i);
+ KnownZero &= ~Elt;
+ KnownOne &= Elt;
}
return;
}
+
// The address of an aligned GlobalValue has trailing zeros.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
unsigned Align = GV->getAlignment();
- if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
- Type *ObjectType = GV->getType()->getElementType();
- // If the object is defined in the current Module, we'll be giving
- // it the preferred alignment. Otherwise, we have to assume that it
- // may only have the minimum ABI alignment.
- if (!GV->isDeclaration() && !GV->mayBeOverridden())
- Align = TD->getPrefTypeAlignment(ObjectType);
- else
- Align = TD->getABITypeAlignment(ObjectType);
+ if (Align == 0 && TD) {
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ Type *ObjectType = GVar->getType()->getElementType();
+ if (ObjectType->isSized()) {
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+ Align = TD->getPreferredAlignment(GVar);
+ else
+ Align = TD->getABITypeAlignment(ObjectType);
+ }
+ }
}
if (Align > 0)
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
+ KnownZero = APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
else
KnownZero.clearAllBits();
KnownOne.clearAllBits();
@@ -126,8 +302,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (GA->mayBeOverridden()) {
KnownZero.clearAllBits(); KnownOne.clearAllBits();
} else {
- ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
- TD, Depth+1);
+ ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
}
return;
}
@@ -136,15 +311,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// Get alignment information off byval arguments if specified in the IR.
if (A->hasByValAttr())
if (unsigned Align = A->getParamAlignment())
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
+ KnownZero = APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
return;
}
// Start out not knowing anything.
KnownZero.clearAllBits(); KnownOne.clearAllBits();
- if (Depth == MaxDepth || Mask == 0)
+ if (Depth == MaxDepth)
return; // Limit search depth.
Operator *I = dyn_cast<Operator>(V);
@@ -153,12 +328,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
switch (I->getOpcode()) {
default: break;
+ case Instruction::Load:
+ if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+ computeMaskedBitsLoad(*MD, KnownZero);
+ return;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
- APInt Mask2(Mask & ~KnownZero);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -169,10 +346,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
case Instruction::Or: {
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
- APInt Mask2(Mask & ~KnownOne);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -183,9 +358,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
case Instruction::Xor: {
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -197,55 +371,32 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
case Instruction::Mul: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- KnownOne.clearAllBits();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
- KnownZero2.countLeadingOnes(),
- BitWidth) - BitWidth;
-
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
- return;
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(0),
- AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- ComputeMaskedBits(I->getOperand(1),
- AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
return;
}
case Instruction::Select:
- ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+ ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -278,11 +429,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
else
SrcBitWidth = SrcTy->getScalarSizeInBits();
- APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
KnownZero = KnownZero.zextOrTrunc(BitWidth);
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
@@ -296,8 +445,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
return;
}
break;
@@ -306,11 +454,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
- APInt MaskIn = Mask.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -327,9 +473,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- APInt Mask2(Mask.lshr(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
@@ -344,9 +488,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
// Unsigned shift right.
- APInt Mask2(Mask.shl(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -362,9 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
- APInt Mask2(Mask.shl(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -378,100 +518,25 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
break;
case Instruction::Sub: {
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (!CLHS->getValue().isNegative()) {
- unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
- TD, Depth+1);
-
- // If all of the MaskV bits are known to be zero, then we know the
- // output top bits are zero, because we now know that the output is
- // from [0-C].
- if ((KnownZero2 & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
- // Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
- }
- }
- }
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+ Depth);
+ break;
}
- // fall through
case Instruction::Add: {
- // If one of the operands has trailing zeros, then the bits that the
- // other operand has in those bit positions will be preserved in the
- // result. For an add, this works with either operand. For a subtract,
- // this only works if the known zeros are in the right operand.
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt Mask2 = APInt::getLowBitsSet(BitWidth,
- BitWidth - Mask.countLeadingZeros());
- ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
- Depth+1);
- assert((LHSKnownZero & LHSKnownOne) == 0 &&
- "Bits known to be one AND zero?");
- unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
-
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
- unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
-
- // Determine which operand has more trailing zeros, and use that
- // many bits from the other operand.
- if (LHSKnownZeroOut > RHSKnownZeroOut) {
- if (I->getOpcode() == Instruction::Add) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
- KnownZero |= KnownZero2 & Mask;
- KnownOne |= KnownOne2 & Mask;
- } else {
- // If the known zeros are in the left operand for a subtract,
- // fall back to the minimum known zeros in both operands.
- KnownZero |= APInt::getLowBitsSet(BitWidth,
- std::min(LHSKnownZeroOut,
- RHSKnownZeroOut));
- }
- } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
- KnownZero |= LHSKnownZero & Mask;
- KnownOne |= LHSKnownOne & Mask;
- }
-
- // Are we still trying to solve for the sign bit?
- if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
- OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
- if (OBO->hasNoSignedWrap()) {
- if (I->getOpcode() == Instruction::Add) {
- // Adding two positive numbers can't wrap into negative
- if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // and adding two negative numbers can't wrap into positive.
- else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- } else {
- // Subtracting a negative number from a positive one can't wrap
- if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // neither can subtracting a positive number from a negative one.
- else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- }
- }
- }
-
- return;
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+ Depth);
+ break;
}
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -487,19 +552,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
KnownOne |= ~LowBits;
- KnownZero &= Mask;
- KnownOne &= Mask;
-
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
}
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
- if (Mask.isNegative() && KnownZero.isNonNegative()) {
- APInt Mask2 = APInt::getSignBit(BitWidth);
+ if (KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
@@ -512,27 +573,24 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
APInt RA = Rem->getValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & Mask;
- KnownZero |= ~LowBits & Mask;
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD,
Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= ~LowBits;
+ KnownOne &= LowBits;
break;
}
}
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
- TD, Depth+1);
- ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
- TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
break;
}
@@ -543,17 +601,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
Align = TD->getABITypeAlignment(AI->getType()->getElementType());
if (Align > 0)
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
+ KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
break;
}
case Instruction::GetElementPtr: {
// Analyze all of the subscripts of this getelementptr instruction
// to determine if we can prove known low zero bits.
- APInt LocalMask = APInt::getAllOnesValue(BitWidth);
APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LocalMask,
- LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
+ Depth+1);
unsigned TrailZ = LocalKnownZero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
@@ -573,17 +629,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (!IndexedTy->isSized()) return;
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
- LocalMask = APInt::getAllOnesValue(GEPOpiBits);
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- ComputeMaskedBits(Index, LocalMask,
- LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
TrailZ = std::min(TrailZ,
unsigned(CountTrailingZeros_64(TypeSize) +
LocalKnownZero.countTrailingOnes()));
}
}
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
break;
}
case Instruction::PHI: {
@@ -618,17 +672,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
- Mask2 = APInt::getLowBitsSet(BitWidth,
- KnownZero2.countTrailingOnes());
+ ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1);
// We need to take the minimum number of known bits
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+ ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1);
- KnownZero = Mask &
- APInt::getLowBitsSet(BitWidth,
+ KnownZero = APInt::getLowBitsSet(BitWidth,
std::min(KnownZero2.countTrailingOnes(),
KnownZero3.countTrailingOnes()));
break;
@@ -657,8 +707,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
KnownOne2 = APInt(BitWidth, 0);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
- KnownZero2, KnownOne2, TD, MaxDepth-1);
+ ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
+ MaxDepth-1);
KnownZero &= KnownZero2;
KnownOne &= KnownOne2;
// If all bits have been ruled out, there's no need to check
@@ -673,10 +723,17 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
- case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz: {
unsigned LowBits = Log2_32(BitWidth)+1;
+ // If this call is undefined for 0, the result will be less than 2^n.
+ if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+ LowBits -= 1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ case Intrinsic::ctpop: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
@@ -687,6 +744,34 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
}
break;
+ case Instruction::ExtractValue:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
+ ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+ if (EVI->getNumIndices() != 1) break;
+ if (EVI->getIndices()[0] == 0) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
+ II->getArgOperand(1), false, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
+ II->getArgOperand(1), false, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+ false, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, TD, Depth);
+ break;
+ }
+ }
+ }
}
}
@@ -702,8 +787,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
}
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
- ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
- Depth);
+ ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth);
KnownOne = OneBits[BitWidth - 1];
KnownZero = ZeroBits[BitWidth - 1];
}
@@ -712,10 +796,15 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
/// bit set when defined. For vectors return true if every element is known to
/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
-bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
- return CI->getValue().isPowerOf2();
- // TODO: Handle vector constants.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero,
+ unsigned Depth) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return OrZero;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2();
+ // TODO: Handle vector constants.
+ }
// 1 << X is clearly a power of two if the one is not shifted off the end. If
// it is shifted off the end then the result is undefined.
@@ -731,21 +820,36 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
if (Depth++ == MaxDepth)
return false;
+ Value *X = 0, *Y = 0;
+ // A shift of a power of two is a power of two or zero.
+ if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
+ match(V, m_Shr(m_Value(X), m_Value()))))
+ return isPowerOfTwo(X, TD, /*OrZero*/true, Depth);
+
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
- return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+ return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth);
if (SelectInst *SI = dyn_cast<SelectInst>(V))
- return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
- isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+ return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) &&
+ isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth);
+
+ if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
+ // A power of two and'd with anything is a power of two or zero.
+ if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) ||
+ isPowerOfTwo(Y, TD, /*OrZero*/true, Depth))
+ return true;
+ // X & (-X) is always a power of two or zero.
+ if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
+ return true;
+ return false;
+ }
// An exact divide or right shift can only shift off zero bits, so the result
// is a power of two only if the first operand is a power of two and not
// copying a sign bit (sdiv int_min, 2).
- if (match(V, m_LShr(m_Value(), m_Value())) ||
- match(V, m_UDiv(m_Value(), m_Value()))) {
- PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V);
- if (PEO->isExact())
- return isPowerOfTwo(PEO->getOperand(0), TD, Depth);
+ if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
+ match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
+ return isPowerOfTwo(cast<Operator>(V)->getOperand(0), TD, OrZero, Depth);
}
return false;
@@ -767,7 +871,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
}
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == MaxDepth)
+ if (Depth++ >= MaxDepth)
return false;
unsigned BitWidth = getBitWidth(V->getType(), TD);
@@ -785,13 +889,13 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
// if the lowest bit is shifted off the end.
if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
// shl nuw can't remove any non-zero bits.
- BinaryOperator *BO = cast<BinaryOperator>(V);
+ OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
if (BO->hasNoUnsignedWrap())
return isKnownNonZero(X, TD, Depth);
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+ ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
if (KnownOne[0])
return true;
}
@@ -799,7 +903,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
// defined if the sign bit is shifted off the end.
else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
// shr exact can only shift out zero bits.
- BinaryOperator *BO = cast<BinaryOperator>(V);
+ PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
if (BO->isExact())
return isKnownNonZero(X, TD, Depth);
@@ -809,10 +913,8 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
return true;
}
// div exact can only produce a zero if the dividend is zero.
- else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
- BinaryOperator *BO = cast<BinaryOperator>(V);
- if (BO->isExact())
- return isKnownNonZero(X, TD, Depth);
+ else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
+ return isKnownNonZero(X, TD, Depth);
}
// X + Y.
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
@@ -835,20 +937,29 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+ ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
if ((KnownOne & Mask) != 0)
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+ ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth);
if ((KnownOne & Mask) != 0)
return true;
}
// The sum of a non-negative number and a power of two is not zero.
- if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+ if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth))
return true;
- if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+ if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth))
+ return true;
+ }
+ // X * Y.
+ else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
+ OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+ // If X and Y are non-zero then so is X * Y as long as the multiplication
+ // does not overflow.
+ if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+ isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth))
return true;
}
// (C ? X : Y) != 0 if X != 0 and Y != 0.
@@ -861,8 +972,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
- TD, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
return KnownOne != 0;
}
@@ -878,7 +988,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
const TargetData *TD, unsigned Depth) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
return (KnownZero & Mask) == Mask;
}
@@ -917,30 +1027,28 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
- case Instruction::AShr:
+ case Instruction::AShr: {
Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
- // ashr X, C -> adds C sign bits.
- if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
- Tmp += C->getZExtValue();
+ // ashr X, C -> adds C sign bits. Vectors too.
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ Tmp += ShAmt->getZExtValue();
if (Tmp > TyBits) Tmp = TyBits;
}
- // vector ashr X, <C, C, C, C> -> adds C sign bits
- if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
- if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
- Tmp += CI->getZExtValue();
- if (Tmp > TyBits) Tmp = TyBits;
- }
- }
return Tmp;
- case Instruction::Shl:
- if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ }
+ case Instruction::Shl: {
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
// shl destroys sign bits.
Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
- if (C->getZExtValue() >= TyBits || // Bad shift.
- C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
- return Tmp - C->getZExtValue();
+ Tmp2 = ShAmt->getZExtValue();
+ if (Tmp2 >= TyBits || // Bad shift.
+ Tmp2 >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - Tmp2;
}
break;
+ }
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: // NOT is handled here.
@@ -971,13 +1079,11 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
- Depth+1);
+ ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
return TyBits;
// If we are subtracting one from a positive number, there is no carry
@@ -998,12 +1104,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne,
- TD, Depth+1);
+ ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
return TyBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -1045,8 +1149,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ APInt Mask;
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
@@ -1282,23 +1386,21 @@ Value *llvm::isBytewiseValue(Value *V) {
}
}
- // A ConstantArray is splatable if all its members are equal and also
- // splatable.
- if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
- if (CA->getNumOperands() == 0)
- return 0;
-
- Value *Val = isBytewiseValue(CA->getOperand(0));
+ // A ConstantDataArray/Vector is splatable if all its members are equal and
+ // also splatable.
+ if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
+ Value *Elt = CA->getElementAsConstant(0);
+ Value *Val = isBytewiseValue(Elt);
if (!Val)
return 0;
- for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
- if (CA->getOperand(I-1) != CA->getOperand(I))
+ for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
+ if (CA->getElementAsConstant(I) != Elt)
return 0;
return Val;
}
-
+
// Conceptually, we could handle things like:
// %a = zext i8 %X to i16
// %b = shl i16 %a, 8
@@ -1395,50 +1497,44 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
Instruction *InsertBefore) {
// Nothing to index? Just return V then (this is useful at the end of our
- // recursion)
+ // recursion).
if (idx_range.empty())
return V;
- // We have indices, so V should have an indexable type
- assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
- && "Not looking at a struct or array?");
- assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
- && "Invalid indices for type?");
- CompositeType *PTy = cast<CompositeType>(V->getType());
-
- if (isa<UndefValue>(V))
- return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
- idx_range));
- else if (isa<ConstantAggregateZero>(V))
- return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy,
- idx_range));
- else if (Constant *C = dyn_cast<Constant>(V)) {
- if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
- // Recursively process this constant
- return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1),
- InsertBefore);
- } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+ // We have indices, so V should have an indexable type.
+ assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
+ "Not looking at a struct or array?");
+ assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
+ "Invalid indices for type?");
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = C->getAggregateElement(idx_range[0]);
+ if (C == 0) return 0;
+ return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
+ }
+
+ if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
// Loop the indices for the insertvalue instruction in parallel with the
// requested indices
const unsigned *req_idx = idx_range.begin();
for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
i != e; ++i, ++req_idx) {
if (req_idx == idx_range.end()) {
- if (InsertBefore)
- // The requested index identifies a part of a nested aggregate. Handle
- // this specially. For example,
- // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
- // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
- // %C = extractvalue {i32, { i32, i32 } } %B, 1
- // This can be changed into
- // %A = insertvalue {i32, i32 } undef, i32 10, 0
- // %C = insertvalue {i32, i32 } %A, i32 11, 1
- // which allows the unused 0,0 element from the nested struct to be
- // removed.
- return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
- InsertBefore);
- else
- // We can't handle this without inserting insertvalues
+ // We can't handle this without inserting insertvalues
+ if (!InsertBefore)
return 0;
+
+ // The requested index identifies a part of a nested aggregate. Handle
+ // this specially. For example,
+ // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+ // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+ // %C = extractvalue {i32, { i32, i32 } } %B, 1
+ // This can be changed into
+ // %A = insertvalue {i32, i32 } undef, i32 10, 0
+ // %C = insertvalue {i32, i32 } %A, i32 11, 1
+ // which allows the unused 0,0 element from the nested struct to be
+ // removed.
+ return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
+ InsertBefore);
}
// This insert value inserts something else than what we are looking for.
@@ -1454,7 +1550,9 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
return FindInsertedValue(I->getInsertedValueOperand(),
makeArrayRef(req_idx, idx_range.end()),
InsertBefore);
- } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+ }
+
+ if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
// If we're extracting a value from an aggregrate that was extracted from
// something else, we can extract from that something else directly instead.
// However, we will need to chain I's indices with the requested indices.
@@ -1486,7 +1584,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const TargetData &TD) {
Operator *PtrOp = dyn_cast<Operator>(Ptr);
- if (PtrOp == 0) return Ptr;
+ if (PtrOp == 0 || Ptr->getType()->isVectorTy())
+ return Ptr;
// Just look through bitcasts.
if (PtrOp->getOpcode() == Instruction::BitCast)
@@ -1521,34 +1620,19 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
}
-/// GetConstantStringInfo - This function computes the length of a
+/// getConstantStringInfo - This function computes the length of a
/// null-terminated C string pointed to by V. If successful, it returns true
/// and returns the string in Str. If unsuccessful, it returns false.
-bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
- uint64_t Offset,
- bool StopAtNul) {
- // If V is NULL then return false;
- if (V == NULL) return false;
-
- // Look through bitcast instructions.
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
- return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
-
- // If the value is not a GEP instruction nor a constant expression with a
- // GEP instruction, then return false because ConstantArray can't occur
- // any other way
- const User *GEP = 0;
- if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
- GEP = GEPI;
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (CE->getOpcode() == Instruction::BitCast)
- return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
- if (CE->getOpcode() != Instruction::GetElementPtr)
- return false;
- GEP = CE;
- }
+bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
+ uint64_t Offset, bool TrimAtNul) {
+ assert(V);
+
+ // Look through bitcast instructions and geps.
+ V = V->stripPointerCasts();
- if (GEP) {
+ // If the value is a GEP instructionor constant expression, treat it as an
+ // offset.
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// Make sure the GEP has exactly three arguments.
if (GEP->getNumOperands() != 3)
return false;
@@ -1573,51 +1657,48 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
StartIdx = CI->getZExtValue();
else
return false;
- return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
- StopAtNul);
+ return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
}
-
+
// The GEP instruction, constant or instruction, must reference a global
// variable that is a constant and is initialized. The referenced constant
// initializer is the array that we'll use for optimization.
- const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return false;
- const Constant *GlobalInit = GV->getInitializer();
-
- // Handle the ConstantAggregateZero case
- if (isa<ConstantAggregateZero>(GlobalInit)) {
+
+ // Handle the all-zeros case
+ if (GV->getInitializer()->isNullValue()) {
// This is a degenerate case. The initializer is constant zero so the
// length of the string must be zero.
- Str.clear();
+ Str = "";
return true;
}
// Must be a Constant Array
- const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
- if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
+ const ConstantDataArray *Array =
+ dyn_cast<ConstantDataArray>(GV->getInitializer());
+ if (Array == 0 || !Array->isString())
return false;
// Get the number of elements in the array
- uint64_t NumElts = Array->getType()->getNumElements();
-
+ uint64_t NumElts = Array->getType()->getArrayNumElements();
+
+ // Start out with the entire array in the StringRef.
+ Str = Array->getAsString();
+
if (Offset > NumElts)
return false;
- // Traverse the constant array from 'Offset' which is the place the GEP refers
- // to in the array.
- Str.reserve(NumElts-Offset);
- for (unsigned i = Offset; i != NumElts; ++i) {
- const Constant *Elt = Array->getOperand(i);
- const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI) // This array isn't suitable, non-int initializer.
- return false;
- if (StopAtNul && CI->isZero())
- return true; // we found end of string, success!
- Str += (char)CI->getZExtValue();
- }
+ // Skip over 'offset' bytes.
+ Str = Str.substr(Offset);
- // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+ if (TrimAtNul) {
+ // Trim off the \0 and anything after it. If the array is not nul
+ // terminated, we just return the whole end of string. The client may know
+ // some other way that the string is length-bound.
+ Str = Str.substr(0, Str.find('\0'));
+ }
return true;
}
@@ -1629,8 +1710,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
/// the specified pointer, return 'len+1'. If we can't, return 0.
static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
// Look through noop bitcast instructions.
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
- return GetStringLengthH(BCI->getOperand(0), PHIs);
+ V = V->stripPointerCasts();
// If this is a PHI node, there are two cases: either we have already seen it
// or we haven't.
@@ -1666,75 +1746,13 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
if (Len1 != Len2) return 0;
return Len1;
}
-
- // If the value is not a GEP instruction nor a constant expression with a
- // GEP instruction, then return unknown.
- User *GEP = 0;
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
- GEP = GEPI;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (CE->getOpcode() != Instruction::GetElementPtr)
- return 0;
- GEP = CE;
- } else {
- return 0;
- }
-
- // Make sure the GEP has exactly three arguments.
- if (GEP->getNumOperands() != 3)
- return 0;
-
- // Check to make sure that the first operand of the GEP is an integer and
- // has value 0 so that we are sure we're indexing into the initializer.
- if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
- if (!Idx->isZero())
- return 0;
- } else
- return 0;
-
- // If the second index isn't a ConstantInt, then this is a variable index
- // into the array. If this occurs, we can't say anything meaningful about
- // the string.
- uint64_t StartIdx = 0;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
- StartIdx = CI->getZExtValue();
- else
- return 0;
-
- // The GEP instruction, constant or instruction, must reference a global
- // variable that is a constant and is initialized. The referenced constant
- // initializer is the array that we'll use for optimization.
- GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
- if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
- GV->mayBeOverridden())
+
+ // Otherwise, see if we can read the string.
+ StringRef StrData;
+ if (!getConstantStringInfo(V, StrData))
return 0;
- Constant *GlobalInit = GV->getInitializer();
-
- // Handle the ConstantAggregateZero case, which is a degenerate case. The
- // initializer is constant zero so the length of the string must be zero.
- if (isa<ConstantAggregateZero>(GlobalInit))
- return 1; // Len = 0 offset by 1.
-
- // Must be a Constant Array
- ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
- if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
- return false;
-
- // Get the number of elements in the array
- uint64_t NumElts = Array->getType()->getNumElements();
-
- // Traverse the constant array from StartIdx (derived above) which is
- // the place the GEP refers to in the array.
- for (unsigned i = StartIdx; i != NumElts; ++i) {
- Constant *Elt = Array->getOperand(i);
- ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI) // This array isn't suitable, non-int initializer.
- return 0;
- if (CI->isZero())
- return i-StartIdx+1; // We found end of string, success!
- }
- return 0; // The array isn't null terminated, conservatively return 'unknown'.
+ return StrData.size()+1;
}
/// GetStringLength - If we can compute the length of the string pointed to by
@@ -1793,3 +1811,94 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
}
return true;
}
+
+bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+ const TargetData *TD) {
+ const Operator *Inst = dyn_cast<Operator>(V);
+ if (!Inst)
+ return false;
+
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
+ if (C->canTrap())
+ return false;
+
+ switch (Inst->getOpcode()) {
+ default:
+ return true;
+ case Instruction::UDiv:
+ case Instruction::URem:
+ // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+ return isKnownNonZero(Inst->getOperand(1), TD);
+ case Instruction::SDiv:
+ case Instruction::SRem: {
+ Value *Op = Inst->getOperand(1);
+ // x / y is undefined if y == 0
+ if (!isKnownNonZero(Op, TD))
+ return false;
+ // x / y might be undefined if y == -1
+ unsigned BitWidth = getBitWidth(Op->getType(), TD);
+ if (BitWidth == 0)
+ return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, TD);
+ return !!KnownZero;
+ }
+ case Instruction::Load: {
+ const LoadInst *LI = cast<LoadInst>(Inst);
+ if (!LI->isUnordered())
+ return false;
+ return LI->getPointerOperand()->isDereferenceablePointer();
+ }
+ case Instruction::Call: {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ // These synthetic intrinsics have no side-effects, and just mark
+ // information about their operands.
+ // FIXME: There are other no-op synthetic instructions that potentially
+ // should be considered at least *safe* to speculate...
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ return true;
+
+ case Intrinsic::bswap:
+ case Intrinsic::ctlz:
+ case Intrinsic::ctpop:
+ case Intrinsic::cttz:
+ case Intrinsic::objectsize:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ return true;
+ // TODO: some fp intrinsics are marked as having the same error handling
+ // as libm. They're safe to speculate when they won't error.
+ // TODO: are convert_{from,to}_fp16 safe?
+ // TODO: can we list target-specific intrinsics here?
+ default: break;
+ }
+ }
+ return false; // The called function could have undefined behavior or
+ // side-effects, even if marked readnone nounwind.
+ }
+ case Instruction::VAArg:
+ case Instruction::Alloca:
+ case Instruction::Invoke:
+ case Instruction::PHI:
+ case Instruction::Store:
+ case Instruction::Ret:
+ case Instruction::Br:
+ case Instruction::IndirectBr:
+ case Instruction::Switch:
+ case Instruction::Unreachable:
+ case Instruction::Fence:
+ case Instruction::LandingPad:
+ case Instruction::AtomicRMW:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::Resume:
+ return false; // Misc instructions which have effects
+ }
+}
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index eef6fe0b1c1d..68873e2768d3 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "ArchiveInternals.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Module.h"
+#include <cstdio>
#include <cstdlib>
#include <memory>
using namespace llvm;
@@ -504,7 +506,7 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
// Modules that define those symbols.
bool
Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
- std::set<Module*>& result,
+ SmallVectorImpl<Module*>& result,
std::string* error) {
if (!mapfile || !base) {
if (error)
@@ -569,21 +571,26 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
// At this point we have a valid symbol table (one way or another) so we
// just use it to quickly find the symbols requested.
+ SmallPtrSet<Module*, 16> Added;
for (std::set<std::string>::iterator I=symbols.begin(),
- E=symbols.end(); I != E;) {
+ Next = I,
+ E=symbols.end(); I != E; I = Next) {
+ // Increment Next before we invalidate it.
+ ++Next;
+
// See if this symbol exists
Module* m = findModuleDefiningSymbol(*I,error);
- if (m) {
- // The symbol exists, insert the Module into our result, duplicates will
- // be ignored.
- result.insert(m);
-
- // Remove the symbol now that its been resolved, being careful to
- // post-increment the iterator.
- symbols.erase(I++);
- } else {
- ++I;
- }
+ if (!m)
+ continue;
+ bool NewMember = Added.insert(m);
+ if (!NewMember)
+ continue;
+
+ // The symbol exists, insert the Module into our result.
+ result.push_back(m);
+
+ // Remove the symbol now that its been resolved.
+ symbols.erase(I);
}
return true;
}
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 8fcc7aa29cc8..9ef29432ddf2 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
if (hasSlash || filePath.str().length() > 15)
flags |= ArchiveMember::HasLongFilenameFlag;
- sys::LLVMFileType type;
+ sys::fs::file_magic type;
if (sys::fs::identify_magic(mbr->path.str(), type))
- type = sys::Unknown_FileType;
+ type = sys::fs::file_magic::unknown;
switch (type) {
- case sys::Bitcode_FileType:
+ case sys::fs::file_magic::bitcode:
flags |= ArchiveMember::BitcodeFlag;
break;
default:
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index b52974e0753d..7ff478a41a59 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -3,9 +3,3 @@ add_llvm_library(LLVMArchive
ArchiveReader.cpp
ArchiveWriter.cpp
)
-
-add_llvm_library_dependencies(LLVMArchive
- LLVMBitReader
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Archive/LLVMBuild.txt b/lib/Archive/LLVMBuild.txt
new file mode 100644
index 000000000000..d68550b45fe8
--- /dev/null
+++ b/lib/Archive/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Archive/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Archive
+parent = Libraries
+required_libraries = BitReader Core Support
diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt
index 749601510b5b..985ebe200988 100644
--- a/lib/AsmParser/CMakeLists.txt
+++ b/lib/AsmParser/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMAsmParser
LLParser.cpp
Parser.cpp
)
-
-add_llvm_library_dependencies(LLVMAsmParser
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index d0dd98627bab..8818168f643d 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
- ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
+ ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
return true;
}
@@ -55,18 +55,22 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
return Result;
}
+static char parseHexChar(char C) {
+ if (C >= '0' && C <= '9')
+ return C-'0';
+ if (C >= 'A' && C <= 'F')
+ return C-'A'+10;
+ if (C >= 'a' && C <= 'f')
+ return C-'a'+10;
+ return 0;
+}
+
uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
uint64_t Result = 0;
for (; Buffer != End; ++Buffer) {
uint64_t OldRes = Result;
Result *= 16;
- char C = *Buffer;
- if (C >= '0' && C <= '9')
- Result += C-'0';
- else if (C >= 'A' && C <= 'F')
- Result += C-'A'+10;
- else if (C >= 'a' && C <= 'f')
- Result += C-'a'+10;
+ Result += parseHexChar(*Buffer);
if (Result < OldRes) { // Uh, oh, overflow detected!!!
Error("constant bigger than 64 bits detected!");
@@ -82,24 +86,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
for (int i=0; i<16; i++, Buffer++) {
assert(Buffer != End);
Pair[0] *= 16;
- char C = *Buffer;
- if (C >= '0' && C <= '9')
- Pair[0] += C-'0';
- else if (C >= 'A' && C <= 'F')
- Pair[0] += C-'A'+10;
- else if (C >= 'a' && C <= 'f')
- Pair[0] += C-'a'+10;
+ Pair[0] += parseHexChar(*Buffer);
}
Pair[1] = 0;
for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
Pair[1] *= 16;
- char C = *Buffer;
- if (C >= '0' && C <= '9')
- Pair[1] += C-'0';
- else if (C >= 'A' && C <= 'F')
- Pair[1] += C-'A'+10;
- else if (C >= 'a' && C <= 'f')
- Pair[1] += C-'a'+10;
+ Pair[1] += parseHexChar(*Buffer);
}
if (Buffer != End)
Error("constant bigger than 128 bits detected!");
@@ -113,24 +105,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
assert(Buffer != End);
Pair[1] *= 16;
- char C = *Buffer;
- if (C >= '0' && C <= '9')
- Pair[1] += C-'0';
- else if (C >= 'A' && C <= 'F')
- Pair[1] += C-'A'+10;
- else if (C >= 'a' && C <= 'f')
- Pair[1] += C-'a'+10;
+ Pair[1] += parseHexChar(*Buffer);
}
Pair[0] = 0;
for (int i=0; i<16; i++, Buffer++) {
Pair[0] *= 16;
- char C = *Buffer;
- if (C >= '0' && C <= '9')
- Pair[0] += C-'0';
- else if (C >= 'A' && C <= 'F')
- Pair[0] += C-'A'+10;
- else if (C >= 'a' && C <= 'f')
- Pair[0] += C-'a'+10;
+ Pair[0] += parseHexChar(*Buffer);
}
if (Buffer != End)
Error("constant bigger than 128 bits detected!");
@@ -149,9 +129,7 @@ static void UnEscapeLexed(std::string &Str) {
*BOut++ = '\\'; // Two \ becomes one
BIn += 2;
} else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
- char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
- *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
- BIn[3] = Tmp; // Restore character
+ *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]);
BIn += 3; // Skip over handled chars
++BOut;
} else {
@@ -503,6 +481,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(tail);
KEYWORD(target);
KEYWORD(triple);
+ KEYWORD(unwind);
KEYWORD(deplibs);
KEYWORD(datalayout);
KEYWORD(volatile);
@@ -570,6 +549,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noimplicitfloat);
KEYWORD(naked);
KEYWORD(nonlazybind);
+ KEYWORD(address_safety);
KEYWORD(type);
KEYWORD(opaque);
@@ -596,6 +576,7 @@ lltok::Kind LLLexer::LexIdentifier() {
if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
TyVal = LLVMTY; return lltok::Type; }
TYPEKEYWORD("void", Type::getVoidTy(Context));
+ TYPEKEYWORD("half", Type::getHalfTy(Context));
TYPEKEYWORD("float", Type::getFloatTy(Context));
TYPEKEYWORD("double", Type::getDoubleTy(Context));
TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
@@ -642,7 +623,6 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(indirectbr, IndirectBr);
INSTKEYWORD(invoke, Invoke);
INSTKEYWORD(resume, Resume);
- INSTKEYWORD(unwind, Unwind);
INSTKEYWORD(unreachable, Unreachable);
INSTKEYWORD(alloca, Alloca);
@@ -715,7 +695,7 @@ lltok::Kind LLLexer::Lex0x() {
if (Kind == 'J') {
// HexFPConstant - Floating point constant represented in IEEE format as a
// hexadecimal number for when exponential notation is not precise enough.
- // Float and double only.
+ // Half, Float, and double only.
APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
return lltok::APFloat;
}
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 33b913572375..09aea5b01825 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -42,7 +42,6 @@ namespace llvm {
APFloat APFloatVal;
APSInt APSIntVal;
- std::string TheError;
public:
explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
LLVMContext &C);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cafaab01afd9..068be3d47c33 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -120,11 +120,6 @@ bool LLParser::ValidateEndOfModule() {
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
- // Upgrade to new EH scheme. N.B. This will go away in 3.1.
- UpgradeExceptionHandling(M);
-
- // Check debug info intrinsics.
- CheckDebugInfoIntrinsics(M);
return false;
}
@@ -879,7 +874,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
/// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind
/// indicates what kind of attribute list this is: 0: function arg, 1: result,
/// 2: function attr.
-bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
+bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
Attrs = Attribute::None;
LocTy AttrLoc = Lex.getLoc();
@@ -924,6 +919,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
case lltok::kw_naked: Attrs |= Attribute::Naked; break;
case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break;
+ case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break;
case lltok::kw_alignstack: {
unsigned Alignment;
@@ -1047,13 +1043,11 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_cc: {
unsigned ArbitraryCC;
Lex.Lex();
- if (ParseUInt32(ArbitraryCC)) {
+ if (ParseUInt32(ArbitraryCC))
return true;
- } else
- CC = static_cast<CallingConv::ID>(ArbitraryCC);
- return false;
+ CC = static_cast<CallingConv::ID>(ArbitraryCC);
+ return false;
}
- break;
}
Lex.Lex();
@@ -1069,7 +1063,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
return TokError("expected metadata after comma");
std::string Name = Lex.getStrVal();
- unsigned MDK = M->getMDKindID(Name.c_str());
+ unsigned MDK = M->getMDKindID(Name);
Lex.Lex();
MDNode *Node;
@@ -1358,8 +1352,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
// Parse the argument.
LocTy ArgLoc;
Type *ArgTy = 0;
- unsigned ArgAttrs1 = Attribute::None;
- unsigned ArgAttrs2 = Attribute::None;
+ Attributes ArgAttrs1;
+ Attributes ArgAttrs2;
Value *V;
if (ParseType(ArgTy, ArgLoc))
return true;
@@ -1399,7 +1393,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
} else {
LocTy TypeLoc = Lex.getLoc();
Type *ArgTy = 0;
- unsigned Attrs;
+ Attributes Attrs;
std::string Name;
if (ParseType(ArgTy) ||
@@ -1466,7 +1460,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
if (!ArgList[i].Name.empty())
return Error(ArgList[i].Loc, "argument name invalid in function type");
- if (ArgList[i].Attrs != 0)
+ if (ArgList[i].Attrs)
return Error(ArgList[i].Loc,
"argument attributes invalid in function type");
}
@@ -1612,7 +1606,8 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
if ((unsigned)Size != Size)
return Error(SizeLoc, "size too large for vector");
if (!VectorType::isValidElementType(EltTy))
- return Error(TypeLoc, "vector element type must be fp or integer");
+ return Error(TypeLoc,
+ "vector element type must be fp, integer or a pointer to these types");
Result = VectorType::get(EltTy, unsigned(Size));
} else {
if (!ArrayType::isValidElementType(EltTy))
@@ -1971,9 +1966,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return Error(ID.Loc, "constant vector must not be empty");
if (!Elts[0]->getType()->isIntegerTy() &&
- !Elts[0]->getType()->isFloatingPointTy())
+ !Elts[0]->getType()->isFloatingPointTy() &&
+ !Elts[0]->getType()->isPointerTy())
return Error(FirstEltLoc,
- "vector elements must have integer or floating point type");
+ "vector elements must have integer, pointer or floating point type");
// Verify that all the vector elements have the same type.
for (unsigned i = 1, e = Elts.size(); i != e; ++i)
@@ -2022,7 +2018,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
case lltok::kw_c: // c "foo"
Lex.Lex();
- ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false);
+ ID.ConstantVal = ConstantDataArray::getString(Context, Lex.getStrVal(),
+ false);
if (ParseToken(lltok::StringConstant, "expected string")) return true;
ID.Kind = ValID::t_Constant;
return false;
@@ -2165,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
} else {
assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
if (!Val0->getType()->isIntOrIntVectorTy() &&
- !Val0->getType()->isPointerTy())
+ !Val0->getType()->getScalarType()->isPointerTy())
return Error(ID.Loc, "icmp requires pointer or integer operands");
ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
}
@@ -2299,7 +2296,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Opc == Instruction::GetElementPtr) {
- if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+ if (Elts.size() == 0 ||
+ !Elts[0]->getType()->getScalarType()->isPointerTy())
return Error(ID.Loc, "getelementptr requires pointer operand");
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -2440,7 +2438,6 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return Error(ID.Loc, "functions are not values, refer to them as pointers");
switch (ID.Kind) {
- default: llvm_unreachable("Unknown ValID!");
case ValID::t_LocalID:
if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
@@ -2485,13 +2482,16 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
!ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
return Error(ID.Loc, "floating point constant invalid for type");
- // The lexer has no type info, so builds all float and double FP constants
- // as double. Fix this here. Long double does not need this.
- if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
- Ty->isFloatTy()) {
+ // The lexer has no type info, so builds all half, float, and double FP
+ // constants as double. Fix this here. Long double does not need this.
+ if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble) {
bool Ignored;
- ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
- &Ignored);
+ if (Ty->isHalfTy())
+ ID.APFloatVal.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven,
+ &Ignored);
+ else if (Ty->isFloatTy())
+ ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &Ignored);
}
V = ConstantFP::get(Context, ID.APFloatVal);
@@ -2549,6 +2549,7 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return Error(ID.Loc, "constant expression type mismatch");
return false;
}
+ llvm_unreachable("Invalid ValID");
}
bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
@@ -2585,7 +2586,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
LocTy LinkageLoc = Lex.getLoc();
unsigned Linkage;
- unsigned Visibility, RetAttrs;
+ unsigned Visibility;
+ Attributes RetAttrs;
CallingConv::ID CC;
Type *RetType = 0;
LocTy RetTypeLoc = Lex.getLoc();
@@ -2649,7 +2651,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
SmallVector<ArgInfo, 8> ArgList;
bool isVarArg;
- unsigned FuncAttrs;
+ Attributes FuncAttrs;
std::string Section;
unsigned Alignment;
std::string GC;
@@ -2835,7 +2837,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
}
switch (ParseInstruction(Inst, BB, PFS)) {
- default: assert(0 && "Unknown ParseInstruction result!");
+ default: llvm_unreachable("Unknown ParseInstruction result!");
case InstError: return true;
case InstNormal:
BB->getInstList().push_back(Inst);
@@ -2881,7 +2883,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
switch (Token) {
default: return Error(Loc, "expected instruction opcode");
// Terminator Instructions.
- case lltok::kw_unwind: Inst = new UnwindInst(Context); return false;
case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
case lltok::kw_ret: return ParseRet(Inst, BB, PFS);
case lltok::kw_br: return ParseBr(Inst, PFS);
@@ -2953,19 +2954,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_tail: return ParseCall(Inst, PFS, true);
// Memory.
case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
- case lltok::kw_load: return ParseLoad(Inst, PFS, false);
- case lltok::kw_store: return ParseStore(Inst, PFS, false);
+ case lltok::kw_load: return ParseLoad(Inst, PFS);
+ case lltok::kw_store: return ParseStore(Inst, PFS);
case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS);
case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS);
case lltok::kw_fence: return ParseFence(Inst, PFS);
- case lltok::kw_volatile:
- // For compatibility; canonical location is after load
- if (EatIfPresent(lltok::kw_load))
- return ParseLoad(Inst, PFS, true);
- else if (EatIfPresent(lltok::kw_store))
- return ParseStore(Inst, PFS, true);
- else
- return TokError("expected 'load' or 'store'");
case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS);
case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS);
@@ -3169,7 +3162,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
LocTy CallLoc = Lex.getLoc();
- unsigned RetAttrs, FnAttrs;
+ Attributes RetAttrs, FnAttrs;
CallingConv::ID CC;
Type *RetType = 0;
LocTy RetTypeLoc;
@@ -3342,7 +3335,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
} else {
assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
if (!LHS->getType()->isIntOrIntVectorTy() &&
- !LHS->getType()->isPointerTy())
+ !LHS->getType()->getScalarType()->isPointerTy())
return Error(Loc, "icmp requires integer operands");
Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
}
@@ -3462,7 +3455,7 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
return true;
if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
- return Error(Loc, "invalid extractelement operands");
+ return Error(Loc, "invalid shufflevector operands");
Inst = new ShuffleVectorInst(Op0, Op1, Op2);
return false;
@@ -3568,7 +3561,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
/// ParameterList OptionalAttrs
bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
bool isTail) {
- unsigned RetAttrs, FnAttrs;
+ Attributes RetAttrs, FnAttrs;
CallingConv::ID CC;
Type *RetType = 0;
LocTy RetTypeLoc;
@@ -3689,10 +3682,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
-/// Compatibility:
-/// ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
- bool isVolatile) {
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy Loc;
unsigned Alignment = 0;
bool AteExtraComma = false;
@@ -3701,15 +3691,12 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
SynchronizationScope Scope = CrossThread;
if (Lex.getKind() == lltok::kw_atomic) {
- if (isVolatile)
- return TokError("mixing atomic with old volatile placement");
isAtomic = true;
Lex.Lex();
}
+ bool isVolatile = false;
if (Lex.getKind() == lltok::kw_volatile) {
- if (isVolatile)
- return TokError("duplicate volatile before and after store");
isVolatile = true;
Lex.Lex();
}
@@ -3736,10 +3723,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
/// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
/// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
-/// Compatibility:
-/// ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
- bool isVolatile) {
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val, *Ptr; LocTy Loc, PtrLoc;
unsigned Alignment = 0;
bool AteExtraComma = false;
@@ -3748,15 +3732,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
SynchronizationScope Scope = CrossThread;
if (Lex.getKind() == lltok::kw_atomic) {
- if (isVolatile)
- return TokError("mixing atomic with old volatile placement");
isAtomic = true;
Lex.Lex();
}
+ bool isVolatile = false;
if (Lex.getKind() == lltok::kw_volatile) {
- if (isVolatile)
- return TokError("duplicate volatile before and after store");
isVolatile = true;
Lex.Lex();
}
@@ -3902,13 +3883,15 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
/// ParseGetElementPtr
/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
- Value *Ptr, *Val; LocTy Loc, EltLoc;
+ Value *Ptr = 0;
+ Value *Val = 0;
+ LocTy Loc, EltLoc;
bool InBounds = EatIfPresent(lltok::kw_inbounds);
if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
- if (!Ptr->getType()->isPointerTy())
+ if (!Ptr->getType()->getScalarType()->isPointerTy())
return Error(Loc, "base of getelementptr must be a pointer");
SmallVector<Value*, 16> Indices;
@@ -3919,11 +3902,23 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
break;
}
if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
- if (!Val->getType()->isIntegerTy())
+ if (!Val->getType()->getScalarType()->isIntegerTy())
return Error(EltLoc, "getelementptr index must be an integer");
+ if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
+ return Error(EltLoc, "getelementptr index type missmatch");
+ if (Val->getType()->isVectorTy()) {
+ unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
+ unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
+ if (ValNumEl != PtrNumEl)
+ return Error(EltLoc,
+ "getelementptr vector index has a wrong number of elements");
+ }
Indices.push_back(Val);
}
+ if (Val && Val->getType()->isVectorTy() && Indices.size() != 1)
+ return Error(EltLoc, "vector getelementptrs must have a single index");
+
if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
return Error(Loc, "invalid getelementptr indices");
Inst = GetElementPtrInst::Create(Ptr, Indices);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index cbc3c23e8631..dda880838117 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -15,6 +15,7 @@
#define LLVM_ASMPARSER_LLPARSER_H
#include "LLLexer.h"
+#include "llvm/Attributes.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
@@ -171,7 +172,7 @@ namespace llvm {
return ParseUInt32(Val);
}
bool ParseOptionalAddrSpace(unsigned &AddrSpace);
- bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
+ bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind);
bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
bool ParseOptionalLinkage(unsigned &Linkage) {
bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
@@ -304,8 +305,8 @@ namespace llvm {
struct ParamInfo {
LocTy Loc;
Value *V;
- unsigned Attrs;
- ParamInfo(LocTy loc, Value *v, unsigned attrs)
+ Attributes Attrs;
+ ParamInfo(LocTy loc, Value *v, Attributes attrs)
: Loc(loc), V(v), Attrs(attrs) {}
};
bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
@@ -325,9 +326,9 @@ namespace llvm {
struct ArgInfo {
LocTy Loc;
Type *Ty;
- unsigned Attrs;
+ Attributes Attrs;
std::string Name;
- ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
+ ArgInfo(LocTy L, Type *ty, Attributes Attr, const std::string &N)
: Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
};
bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
@@ -363,8 +364,8 @@ namespace llvm {
bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
- int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
- int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ int ParseLoad(Instruction *&I, PerFunctionState &PFS);
+ int ParseStore(Instruction *&I, PerFunctionState &PFS);
int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
int ParseFence(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 8f167725ed58..adf5d4f4d0f9 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -50,6 +50,7 @@ namespace lltok {
kw_tail,
kw_target,
kw_triple,
+ kw_unwind,
kw_deplibs,
kw_datalayout,
kw_volatile,
@@ -102,6 +103,7 @@ namespace lltok {
kw_noimplicitfloat,
kw_naked,
kw_nonlazybind,
+ kw_address_safety,
kw_type,
kw_opaque,
@@ -126,7 +128,7 @@ namespace lltok {
kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
- kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_resume,
+ kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume,
kw_unreachable,
kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..3bc31ed910a7
--- /dev/null
+++ b/lib/AsmParser/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/AsmParser/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AsmParser
+parent = Libraries
+required_libraries = Core Support
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 59fb471f2b93..21b7fd411e3d 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -44,7 +44,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
LLVMContext &Context) {
OwningPtr<MemoryBuffer> File;
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
- Err = SMDiagnostic(Filename,
+ Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + ec.message());
return 0;
}
diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt
new file mode 100644
index 000000000000..af9936bbe829
--- /dev/null
+++ b/lib/Bitcode/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Bitcode/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Reader Writer
+
+[component_0]
+type = Group
+name = Bitcode
+parent = Libraries
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 46565f36af16..e3990403bd71 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -22,11 +22,19 @@
#include "llvm/AutoUpgrade.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataStream.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/OperandTraits.h"
using namespace llvm;
+void BitcodeReader::materializeForwardReferencedFunctions() {
+ while (!BlockAddrFwdRefs.empty()) {
+ Function *F = BlockAddrFwdRefs.begin()->first;
+ F->Materialize();
+ }
+}
+
void BitcodeReader::FreeState() {
if (BufferOwned)
delete Buffer;
@@ -394,7 +402,7 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
// The type table size is always specified correctly.
if (ID >= TypeList.size())
return 0;
-
+
if (Type *Ty = TypeList[ID])
return Ty;
@@ -403,14 +411,6 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
return TypeList[ID] = StructType::create(Context);
}
-/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
-Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
- if (ID >= TypeList.size())
- TypeList.resize(ID+1);
-
- return TypeList[ID];
-}
-
//===----------------------------------------------------------------------===//
// Functions for parsing blocks from the bitcode file
@@ -462,8 +462,8 @@ bool BitcodeReader::ParseAttributeBlock() {
// If Function attributes are using index 0 then transfer them
// to index ~0. Index 0 is used for return value attributes but used to be
// used for function attributes.
- Attributes RetAttribute = Attribute::None;
- Attributes FnAttribute = Attribute::None;
+ Attributes RetAttribute;
+ Attributes FnAttribute;
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
// FIXME: remove in LLVM 3.0
// The alignment is stored as a 16-bit raw value from bits 31--16.
@@ -473,23 +473,24 @@ bool BitcodeReader::ParseAttributeBlock() {
if (Alignment && !isPowerOf2_32(Alignment))
return Error("Alignment is not a power of two.");
- Attributes ReconstitutedAttr = Record[i+1] & 0xffff;
+ Attributes ReconstitutedAttr(Record[i+1] & 0xffff);
if (Alignment)
ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment);
- ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11;
- Record[i+1] = ReconstitutedAttr;
+ ReconstitutedAttr |=
+ Attributes((Record[i+1] & (0xffffull << 32)) >> 11);
+ Record[i+1] = ReconstitutedAttr.Raw();
if (Record[i] == 0)
- RetAttribute = Record[i+1];
+ RetAttribute = ReconstitutedAttr;
else if (Record[i] == ~0U)
- FnAttribute = Record[i+1];
+ FnAttribute = ReconstitutedAttr;
}
- unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
+ Attributes OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
Attribute::ReadOnly|Attribute::ReadNone);
if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
- (RetAttribute & OldRetAttrs) != 0) {
+ (RetAttribute & OldRetAttrs)) {
if (FnAttribute == Attribute::None) { // add a slot so they get added.
Record.push_back(~0U);
Record.push_back(0);
@@ -506,8 +507,9 @@ bool BitcodeReader::ParseAttributeBlock() {
} else if (Record[i] == ~0U) {
if (FnAttribute != Attribute::None)
Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute));
- } else if (Record[i+1] != Attribute::None)
- Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1]));
+ } else if (Attributes(Record[i+1]) != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(Record[i],
+ Attributes(Record[i+1])));
}
MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end()));
@@ -521,7 +523,7 @@ bool BitcodeReader::ParseAttributeBlock() {
bool BitcodeReader::ParseTypeTable() {
if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
return Error("Malformed block record");
-
+
return ParseTypeTableBody();
}
@@ -533,7 +535,7 @@ bool BitcodeReader::ParseTypeTableBody() {
unsigned NumRecords = 0;
SmallString<64> TypeName;
-
+
// Read all the records for this type table.
while (1) {
unsigned Code = Stream.ReadCode();
@@ -573,6 +575,9 @@ bool BitcodeReader::ParseTypeTableBody() {
case bitc::TYPE_CODE_VOID: // VOID
ResultTy = Type::getVoidTy(Context);
break;
+ case bitc::TYPE_CODE_HALF: // HALF
+ ResultTy = Type::getHalfTy(Context);
+ break;
case bitc::TYPE_CODE_FLOAT: // FLOAT
ResultTy = Type::getFloatTy(Context);
break;
@@ -615,12 +620,12 @@ bool BitcodeReader::ParseTypeTableBody() {
ResultTy = PointerType::get(ResultTy, AddressSpace);
break;
}
- case bitc::TYPE_CODE_FUNCTION: {
+ case bitc::TYPE_CODE_FUNCTION_OLD: {
// FIXME: attrid is dead, remove it in LLVM 3.0
// FUNCTION: [vararg, attrid, retty, paramty x N]
if (Record.size() < 3)
return Error("Invalid FUNCTION type record");
- std::vector<Type*> ArgTys;
+ SmallVector<Type*, 8> ArgTys;
for (unsigned i = 3, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
ArgTys.push_back(T);
@@ -635,10 +640,29 @@ bool BitcodeReader::ParseTypeTableBody() {
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
}
+ case bitc::TYPE_CODE_FUNCTION: {
+ // FUNCTION: [vararg, retty, paramty x N]
+ if (Record.size() < 2)
+ return Error("Invalid FUNCTION type record");
+ SmallVector<Type*, 8> ArgTys;
+ for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ ArgTys.push_back(T);
+ else
+ break;
+ }
+
+ ResultTy = getTypeByID(Record[1]);
+ if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
+ return Error("invalid type in function type");
+
+ ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+ break;
+ }
case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
if (Record.size() < 1)
return Error("Invalid STRUCT type record");
- std::vector<Type*> EltTys;
+ SmallVector<Type*, 8> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
EltTys.push_back(T);
@@ -728,247 +752,6 @@ bool BitcodeReader::ParseTypeTableBody() {
}
}
-// FIXME: Remove in LLVM 3.1
-bool BitcodeReader::ParseOldTypeTable() {
- if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
- return Error("Malformed block record");
-
- if (!TypeList.empty())
- return Error("Multiple TYPE_BLOCKs found!");
-
-
- // While horrible, we have no good ordering of types in the bc file. Just
- // iteratively parse types out of the bc file in multiple passes until we get
- // them all. Do this by saving a cursor for the start of the type block.
- BitstreamCursor StartOfTypeBlockCursor(Stream);
-
- unsigned NumTypesRead = 0;
-
- SmallVector<uint64_t, 64> Record;
-RestartScan:
- unsigned NextTypeID = 0;
- bool ReadAnyTypes = false;
-
- // Read all the records for this type table.
- while (1) {
- unsigned Code = Stream.ReadCode();
- if (Code == bitc::END_BLOCK) {
- if (NextTypeID != TypeList.size())
- return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
-
- // If we haven't read all of the types yet, iterate again.
- if (NumTypesRead != TypeList.size()) {
- // If we didn't successfully read any types in this pass, then we must
- // have an unhandled forward reference.
- if (!ReadAnyTypes)
- return Error("Obsolete bitcode contains unhandled recursive type");
-
- Stream = StartOfTypeBlockCursor;
- goto RestartScan;
- }
-
- if (Stream.ReadBlockEnd())
- return Error("Error at end of type table block");
- return false;
- }
-
- if (Code == bitc::ENTER_SUBBLOCK) {
- // No known subblocks, always skip them.
- Stream.ReadSubBlockID();
- if (Stream.SkipBlock())
- return Error("Malformed block record");
- continue;
- }
-
- if (Code == bitc::DEFINE_ABBREV) {
- Stream.ReadAbbrevRecord();
- continue;
- }
-
- // Read a record.
- Record.clear();
- Type *ResultTy = 0;
- switch (Stream.ReadRecord(Code, Record)) {
- default: return Error("unknown type in type table");
- case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
- // TYPE_CODE_NUMENTRY contains a count of the number of types in the
- // type list. This allows us to reserve space.
- if (Record.size() < 1)
- return Error("Invalid TYPE_CODE_NUMENTRY record");
- TypeList.resize(Record[0]);
- continue;
- case bitc::TYPE_CODE_VOID: // VOID
- ResultTy = Type::getVoidTy(Context);
- break;
- case bitc::TYPE_CODE_FLOAT: // FLOAT
- ResultTy = Type::getFloatTy(Context);
- break;
- case bitc::TYPE_CODE_DOUBLE: // DOUBLE
- ResultTy = Type::getDoubleTy(Context);
- break;
- case bitc::TYPE_CODE_X86_FP80: // X86_FP80
- ResultTy = Type::getX86_FP80Ty(Context);
- break;
- case bitc::TYPE_CODE_FP128: // FP128
- ResultTy = Type::getFP128Ty(Context);
- break;
- case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
- ResultTy = Type::getPPC_FP128Ty(Context);
- break;
- case bitc::TYPE_CODE_LABEL: // LABEL
- ResultTy = Type::getLabelTy(Context);
- break;
- case bitc::TYPE_CODE_METADATA: // METADATA
- ResultTy = Type::getMetadataTy(Context);
- break;
- case bitc::TYPE_CODE_X86_MMX: // X86_MMX
- ResultTy = Type::getX86_MMXTy(Context);
- break;
- case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
- if (Record.size() < 1)
- return Error("Invalid Integer type record");
- ResultTy = IntegerType::get(Context, Record[0]);
- break;
- case bitc::TYPE_CODE_OPAQUE: // OPAQUE
- if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
- ResultTy = StructType::create(Context);
- break;
- case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
- if (NextTypeID >= TypeList.size()) break;
- // If we already read it, don't reprocess.
- if (TypeList[NextTypeID] &&
- !cast<StructType>(TypeList[NextTypeID])->isOpaque())
- break;
-
- // Set a type.
- if (TypeList[NextTypeID] == 0)
- TypeList[NextTypeID] = StructType::create(Context);
-
- std::vector<Type*> EltTys;
- for (unsigned i = 1, e = Record.size(); i != e; ++i) {
- if (Type *Elt = getTypeByIDOrNull(Record[i]))
- EltTys.push_back(Elt);
- else
- break;
- }
-
- if (EltTys.size() != Record.size()-1)
- break; // Not all elements are ready.
-
- cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
- ResultTy = TypeList[NextTypeID];
- TypeList[NextTypeID] = 0;
- break;
- }
- case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
- // [pointee type, address space]
- if (Record.size() < 1)
- return Error("Invalid POINTER type record");
- unsigned AddressSpace = 0;
- if (Record.size() == 2)
- AddressSpace = Record[1];
- if ((ResultTy = getTypeByIDOrNull(Record[0])))
- ResultTy = PointerType::get(ResultTy, AddressSpace);
- break;
- }
- case bitc::TYPE_CODE_FUNCTION: {
- // FIXME: attrid is dead, remove it in LLVM 3.0
- // FUNCTION: [vararg, attrid, retty, paramty x N]
- if (Record.size() < 3)
- return Error("Invalid FUNCTION type record");
- std::vector<Type*> ArgTys;
- for (unsigned i = 3, e = Record.size(); i != e; ++i) {
- if (Type *Elt = getTypeByIDOrNull(Record[i]))
- ArgTys.push_back(Elt);
- else
- break;
- }
- if (ArgTys.size()+3 != Record.size())
- break; // Something was null.
- if ((ResultTy = getTypeByIDOrNull(Record[2])))
- ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
- break;
- }
- case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
- if (Record.size() < 2)
- return Error("Invalid ARRAY type record");
- if ((ResultTy = getTypeByIDOrNull(Record[1])))
- ResultTy = ArrayType::get(ResultTy, Record[0]);
- break;
- case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
- if (Record.size() < 2)
- return Error("Invalid VECTOR type record");
- if ((ResultTy = getTypeByIDOrNull(Record[1])))
- ResultTy = VectorType::get(ResultTy, Record[0]);
- break;
- }
-
- if (NextTypeID >= TypeList.size())
- return Error("invalid TYPE table");
-
- if (ResultTy && TypeList[NextTypeID] == 0) {
- ++NumTypesRead;
- ReadAnyTypes = true;
-
- TypeList[NextTypeID] = ResultTy;
- }
-
- ++NextTypeID;
- }
-}
-
-
-bool BitcodeReader::ParseOldTypeSymbolTable() {
- if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
- return Error("Malformed block record");
-
- SmallVector<uint64_t, 64> Record;
-
- // Read all the records for this type table.
- std::string TypeName;
- while (1) {
- unsigned Code = Stream.ReadCode();
- if (Code == bitc::END_BLOCK) {
- if (Stream.ReadBlockEnd())
- return Error("Error at end of type symbol table block");
- return false;
- }
-
- if (Code == bitc::ENTER_SUBBLOCK) {
- // No known subblocks, always skip them.
- Stream.ReadSubBlockID();
- if (Stream.SkipBlock())
- return Error("Malformed block record");
- continue;
- }
-
- if (Code == bitc::DEFINE_ABBREV) {
- Stream.ReadAbbrevRecord();
- continue;
- }
-
- // Read a record.
- Record.clear();
- switch (Stream.ReadRecord(Code, Record)) {
- default: // Default behavior: unknown type.
- break;
- case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N]
- if (ConvertToString(Record, 1, TypeName))
- return Error("Invalid TST_ENTRY record");
- unsigned TypeID = Record[0];
- if (TypeID >= TypeList.size())
- return Error("Invalid Type ID in TST_ENTRY record");
-
- // Only apply the type name to a struct type with no name.
- if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
- if (!STy->isLiteral() && !STy->hasName())
- STy->setName(TypeName);
- TypeName.clear();
- break;
- }
- }
-}
-
bool BitcodeReader::ParseValueSymbolTable() {
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
return Error("Malformed block record");
@@ -1262,7 +1045,9 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
if (Record.empty())
return Error("Invalid FLOAT record");
- if (CurTy->isFloatTy())
+ if (CurTy->isHalfTy())
+ V = ConstantFP::get(Context, APFloat(APInt(16, (uint16_t)Record[0])));
+ else if (CurTy->isFloatTy())
V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
else if (CurTy->isDoubleTy())
V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
@@ -1286,7 +1071,7 @@ bool BitcodeReader::ParseConstants() {
return Error("Invalid CST_AGGREGATE record");
unsigned Size = Record.size();
- std::vector<Constant*> Elts;
+ SmallVector<Constant*, 16> Elts;
if (StructType *STy = dyn_cast<StructType>(CurTy)) {
for (unsigned i = 0; i != Size; ++i)
@@ -1308,35 +1093,78 @@ bool BitcodeReader::ParseConstants() {
}
break;
}
- case bitc::CST_CODE_STRING: { // STRING: [values]
+ case bitc::CST_CODE_STRING: // STRING: [values]
+ case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
if (Record.empty())
- return Error("Invalid CST_AGGREGATE record");
-
- ArrayType *ATy = cast<ArrayType>(CurTy);
- Type *EltTy = ATy->getElementType();
+ return Error("Invalid CST_STRING record");
unsigned Size = Record.size();
- std::vector<Constant*> Elts;
+ SmallString<16> Elts;
for (unsigned i = 0; i != Size; ++i)
- Elts.push_back(ConstantInt::get(EltTy, Record[i]));
- V = ConstantArray::get(ATy, Elts);
+ Elts.push_back(Record[i]);
+ V = ConstantDataArray::getString(Context, Elts,
+ BitCode == bitc::CST_CODE_CSTRING);
break;
}
- case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+ case bitc::CST_CODE_DATA: {// DATA: [n x value]
if (Record.empty())
- return Error("Invalid CST_AGGREGATE record");
-
- ArrayType *ATy = cast<ArrayType>(CurTy);
- Type *EltTy = ATy->getElementType();
-
+ return Error("Invalid CST_DATA record");
+
+ Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
unsigned Size = Record.size();
- std::vector<Constant*> Elts;
- for (unsigned i = 0; i != Size; ++i)
- Elts.push_back(ConstantInt::get(EltTy, Record[i]));
- Elts.push_back(Constant::getNullValue(EltTy));
- V = ConstantArray::get(ATy, Elts);
+
+ if (EltTy->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isIntegerTy(64)) {
+ SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isFloatTy()) {
+ SmallVector<float, 16> Elts;
+ for (unsigned i = 0; i != Size; ++i) {
+ union { uint32_t I; float F; };
+ I = Record[i];
+ Elts.push_back(F);
+ }
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isDoubleTy()) {
+ SmallVector<double, 16> Elts;
+ for (unsigned i = 0; i != Size; ++i) {
+ union { uint64_t I; double F; };
+ I = Record[i];
+ Elts.push_back(F);
+ }
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::get(Context, Elts);
+ else
+ V = ConstantDataArray::get(Context, Elts);
+ } else {
+ return Error("Unknown element type in CE_DATA");
+ }
break;
}
+
case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
if (Record.size() < 3) return Error("Invalid CE_BINOP record");
int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
@@ -1517,6 +1345,50 @@ bool BitcodeReader::ParseConstants() {
return false;
}
+bool BitcodeReader::ParseUseLists() {
+ if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of use-list table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a use list record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+ unsigned RecordLength = Record.size();
+ if (RecordLength < 1)
+ return Error ("Invalid UseList reader!");
+ UseListRecords.push_back(Record);
+ break;
+ }
+ }
+ }
+}
+
/// RememberAndSkipFunctionBody - When we see the block for a function body,
/// remember where it is and then skip it. This lets us lazily deserialize the
/// functions.
@@ -1538,8 +1410,36 @@ bool BitcodeReader::RememberAndSkipFunctionBody() {
return false;
}
-bool BitcodeReader::ParseModule() {
- if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+bool BitcodeReader::GlobalCleanup() {
+ // Patch the initializers for globals and aliases up.
+ ResolveGlobalAndAliasInits();
+ if (!GlobalInits.empty() || !AliasInits.empty())
+ return Error("Malformed global initializer set");
+
+ // Look for intrinsic functions which need to be upgraded at some point
+ for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ Function *NewFn;
+ if (UpgradeIntrinsicFunction(FI, NewFn))
+ UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+ }
+
+ // Look for global variables which need to be renamed.
+ for (Module::global_iterator
+ GI = TheModule->global_begin(), GE = TheModule->global_end();
+ GI != GE; ++GI)
+ UpgradeGlobalVariable(GI);
+ // Force deallocation of memory for these vectors to favor the client that
+ // want lazy deserialization.
+ std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+ std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+ return false;
+}
+
+bool BitcodeReader::ParseModule(bool Resume) {
+ if (Resume)
+ Stream.JumpToBit(NextUnreadBit);
+ else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return Error("Malformed block record");
SmallVector<uint64_t, 64> Record;
@@ -1553,33 +1453,7 @@ bool BitcodeReader::ParseModule() {
if (Stream.ReadBlockEnd())
return Error("Error at end of module block");
- // Patch the initializers for globals and aliases up.
- ResolveGlobalAndAliasInits();
- if (!GlobalInits.empty() || !AliasInits.empty())
- return Error("Malformed global initializer set");
- if (!FunctionsWithBodies.empty())
- return Error("Too few function bodies found");
-
- // Look for intrinsic functions which need to be upgraded at some point
- for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
- FI != FE; ++FI) {
- Function* NewFn;
- if (UpgradeIntrinsicFunction(FI, NewFn))
- UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
- }
-
- // Look for global variables which need to be renamed.
- for (Module::global_iterator
- GI = TheModule->global_begin(), GE = TheModule->global_end();
- GI != GE; ++GI)
- UpgradeGlobalVariable(GI);
-
- // Force deallocation of memory for these vectors to favor the client that
- // want lazy deserialization.
- std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
- std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
- std::vector<Function*>().swap(FunctionsWithBodies);
- return false;
+ return GlobalCleanup();
}
if (Code == bitc::ENTER_SUBBLOCK) {
@@ -1600,17 +1474,10 @@ bool BitcodeReader::ParseModule() {
if (ParseTypeTable())
return true;
break;
- case bitc::TYPE_BLOCK_ID_OLD:
- if (ParseOldTypeTable())
- return true;
- break;
- case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
- if (ParseOldTypeSymbolTable())
- return true;
- break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
if (ParseValueSymbolTable())
return true;
+ SeenValueSymbolTable = true;
break;
case bitc::CONSTANTS_BLOCK_ID:
if (ParseConstants() || ResolveGlobalAndAliasInits())
@@ -1623,13 +1490,29 @@ bool BitcodeReader::ParseModule() {
case bitc::FUNCTION_BLOCK_ID:
// If this is the first function body we've seen, reverse the
// FunctionsWithBodies list.
- if (!HasReversedFunctionsWithBodies) {
+ if (!SeenFirstFunctionBody) {
std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
- HasReversedFunctionsWithBodies = true;
+ if (GlobalCleanup())
+ return true;
+ SeenFirstFunctionBody = true;
}
if (RememberAndSkipFunctionBody())
return true;
+ // For streaming bitcode, suspend parsing when we reach the function
+ // bodies. Subsequent materialization calls will resume it when
+ // necessary. For streaming, the function bodies must be at the end of
+ // the bitcode. If the bitcode file is old, the symbol table will be
+ // at the end instead and will not have been seen yet. In this case,
+ // just finish the parse now.
+ if (LazyStreamer && SeenValueSymbolTable) {
+ NextUnreadBit = Stream.GetCurrentBitNo();
+ return false;
+ }
+ break;
+ case bitc::USELIST_BLOCK_ID:
+ if (ParseUseLists())
+ return true;
break;
}
continue;
@@ -1784,8 +1667,10 @@ bool BitcodeReader::ParseModule() {
// If this is a function with a body, remember the prototype we are
// creating now, so that we can match up the body with them later.
- if (!isProto)
+ if (!isProto) {
FunctionsWithBodies.push_back(Func);
+ if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+ }
break;
}
// ALIAS: [alias type, aliasee val#, linkage]
@@ -1824,24 +1709,7 @@ bool BitcodeReader::ParseModule() {
bool BitcodeReader::ParseBitcodeInto(Module *M) {
TheModule = 0;
- unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
- unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
- if (Buffer->getBufferSize() & 3) {
- if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
- return Error("Invalid bitcode signature");
- else
- return Error("Bitcode stream should be a multiple of 4 bytes in length");
- }
-
- // If we have a wrapper header, parse it and ignore the non-bc file contents.
- // The magic number is 0x0B17C0DE stored in little endian.
- if (isBitcodeWrapper(BufPtr, BufEnd))
- if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
- return Error("Invalid bitcode wrapper header");
-
- StreamFile.init(BufPtr, BufEnd);
- Stream.init(StreamFile);
+ if (InitStream()) return true;
// Sniff for the signature.
if (Stream.Read(8) != 'B' ||
@@ -1883,8 +1751,9 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
if (TheModule)
return Error("Multiple MODULE_BLOCKs in same stream");
TheModule = M;
- if (ParseModule())
+ if (ParseModule(false))
return true;
+ if (LazyStreamer) return false;
break;
default:
if (Stream.SkipBlock())
@@ -1952,20 +1821,7 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
}
bool BitcodeReader::ParseTriple(std::string &Triple) {
- if (Buffer->getBufferSize() & 3)
- return Error("Bitcode stream should be a multiple of 4 bytes in length");
-
- unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
- unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
- // If we have a wrapper header, parse it and ignore the non-bc file contents.
- // The magic number is 0x0B17C0DE stored in little endian.
- if (isBitcodeWrapper(BufPtr, BufEnd))
- if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
- return Error("Invalid bitcode wrapper header");
-
- StreamFile.init(BufPtr, BufEnd);
- Stream.init(StreamFile);
+ if (InitStream()) return true;
// Sniff for the signature.
if (Stream.Read(8) != 'B' ||
@@ -2517,10 +2373,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
- case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
- I = new UnwindInst(Context);
- InstructionList.push_back(I);
- break;
case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
I = new UnreachableInst(Context);
InstructionList.push_back(I);
@@ -2845,6 +2697,19 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
return false;
}
+/// FindFunctionInStream - Find the function body in the bitcode stream
+bool BitcodeReader::FindFunctionInStream(Function *F,
+ DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+ while (DeferredFunctionInfoIterator->second == 0) {
+ if (Stream.AtEndOfStream())
+ return Error("Could not find Function in stream");
+ // ParseModule will parse the next body in the stream and set its
+ // position in the DeferredFunctionInfo map.
+ if (ParseModule(true)) return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// GVMaterializer implementation
//===----------------------------------------------------------------------===//
@@ -2865,6 +2730,10 @@ bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+ // If its position is recorded as 0, its body is somewhere in the stream
+ // but we haven't seen it yet.
+ if (DFII->second == 0)
+ if (LazyStreamer && FindFunctionInStream(F, DFII)) return true;
// Move the bit stream to the saved position of the deferred function body.
Stream.JumpToBit(DFII->second);
@@ -2920,6 +2789,12 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
Materialize(F, ErrInfo))
return true;
+ // At this point, if there are any function bodies, the current bit is
+ // pointing to the END_BLOCK record after them. Now make sure the rest
+ // of the bits in the module have been read.
+ if (NextUnreadBit)
+ ParseModule(true);
+
// Upgrade any intrinsic calls that slipped through (should not happen!) and
// delete the old functions to clean up. We can't do this unless the entire
// module is materialized because there could always be another function body
@@ -2939,15 +2814,60 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
}
std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
- // Upgrade to new EH scheme. N.B. This will go away in 3.1.
- UpgradeExceptionHandling(M);
+ return false;
+}
+
+bool BitcodeReader::InitStream() {
+ if (LazyStreamer) return InitLazyStream();
+ return InitStreamFromBuffer();
+}
+
+bool BitcodeReader::InitStreamFromBuffer() {
+ const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+ const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ if (Buffer->getBufferSize() & 3) {
+ if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+ return Error("Invalid bitcode signature");
+ else
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+ }
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+ return Error("Invalid bitcode wrapper header");
- // Check debug info intrinsics.
- CheckDebugInfoIntrinsics(TheModule);
+ StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+ Stream.init(*StreamFile);
return false;
}
+bool BitcodeReader::InitLazyStream() {
+ // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+ // see it.
+ StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
+ StreamFile.reset(new BitstreamReader(Bytes));
+ Stream.init(*StreamFile);
+
+ unsigned char buf[16];
+ if (Bytes->readBytes(0, 16, buf, NULL) == -1)
+ return Error("Bitcode stream must be at least 16 bytes in length");
+
+ if (!isBitcode(buf, buf + 16))
+ return Error("Invalid bitcode signature");
+
+ if (isBitcodeWrapper(buf, buf + 4)) {
+ const unsigned char *bitcodeStart = buf;
+ const unsigned char *bitcodeEnd = buf + 16;
+ SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+ Bytes->dropLeadingBytes(bitcodeStart - buf);
+ Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart);
+ }
+ return false;
+}
//===----------------------------------------------------------------------===//
// External interface
@@ -2970,6 +2890,27 @@ Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
}
// Have the BitcodeReader dtor delete 'Buffer'.
R->setBufferOwned(true);
+
+ R->materializeForwardReferencedFunctions();
+
+ return M;
+}
+
+
+Module *llvm::getStreamedBitcodeModule(const std::string &name,
+ DataStreamer *streamer,
+ LLVMContext &Context,
+ std::string *ErrMsg) {
+ Module *M = new Module(name, Context);
+ BitcodeReader *R = new BitcodeReader(streamer, Context);
+ M->setMaterializer(R);
+ if (R->ParseBitcodeInto(M)) {
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+ delete M; // Also deletes R.
+ return 0;
+ }
+ R->setBufferOwned(false); // no buffer to delete
return M;
}
@@ -2990,6 +2931,9 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
return 0;
}
+ // TODO: Restore the use-lists to the in-memory state when the bitcode was
+ // written. We must defer until the Module has been fully materialized.
+
return M;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 6e6118cac0dc..e7c4e94f785f 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -126,8 +126,11 @@ class BitcodeReader : public GVMaterializer {
Module *TheModule;
MemoryBuffer *Buffer;
bool BufferOwned;
- BitstreamReader StreamFile;
+ OwningPtr<BitstreamReader> StreamFile;
BitstreamCursor Stream;
+ DataStreamer *LazyStreamer;
+ uint64_t NextUnreadBit;
+ bool SeenValueSymbolTable;
const char *ErrorString;
@@ -135,6 +138,7 @@ class BitcodeReader : public GVMaterializer {
BitcodeReaderValueList ValueList;
BitcodeReaderMDValueList MDValueList;
SmallVector<Instruction *, 64> InstructionList;
+ SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
@@ -160,9 +164,10 @@ class BitcodeReader : public GVMaterializer {
// Map the bitcode's custom MDKind ID to the Module's MDKind ID.
DenseMap<unsigned, unsigned> MDKindMap;
- // After the module header has been read, the FunctionsWithBodies list is
- // reversed. This keeps track of whether we've done this yet.
- bool HasReversedFunctionsWithBodies;
+ // Several operations happen after the module header has been read, but
+ // before function bodies are processed. This keeps track of whether
+ // we've done this yet.
+ bool SeenFirstFunctionBody;
/// DeferredFunctionInfo - When function bodies are initially scanned, this
/// map contains info about where to find deferred function body in the
@@ -177,13 +182,22 @@ class BitcodeReader : public GVMaterializer {
public:
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
- ErrorString(0), ValueList(C), MDValueList(C) {
- HasReversedFunctionsWithBodies = false;
+ LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
+ ErrorString(0), ValueList(C), MDValueList(C),
+ SeenFirstFunctionBody(false) {
+ }
+ explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
+ : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+ LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+ ErrorString(0), ValueList(C), MDValueList(C),
+ SeenFirstFunctionBody(false) {
}
~BitcodeReader() {
FreeState();
}
-
+
+ void materializeForwardReferencedFunctions();
+
void FreeState();
/// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
@@ -211,7 +225,6 @@ public:
bool ParseTriple(std::string &Triple);
private:
Type *getTypeByID(unsigned ID);
- Type *getTypeByIDOrNull(unsigned ID);
Value *getFnValueByID(unsigned ID, Type *Ty) {
if (Ty && Ty->isMetadataTy())
return MDValueList.getValueFwdRef(ID);
@@ -256,21 +269,26 @@ private:
}
- bool ParseModule();
+ bool ParseModule(bool Resume);
bool ParseAttributeBlock();
bool ParseTypeTable();
- bool ParseOldTypeTable(); // FIXME: Remove in LLVM 3.1
bool ParseTypeTableBody();
- bool ParseOldTypeSymbolTable(); // FIXME: Remove in LLVM 3.1
bool ParseValueSymbolTable();
bool ParseConstants();
bool RememberAndSkipFunctionBody();
bool ParseFunctionBody(Function *F);
+ bool GlobalCleanup();
bool ResolveGlobalAndAliasInits();
bool ParseMetadata();
bool ParseMetadataAttachment();
bool ParseModuleTriple(std::string &Triple);
+ bool ParseUseLists();
+ bool InitStream();
+ bool InitStreamFromBuffer();
+ bool InitLazyStream();
+ bool FindFunctionInStream(Function *F,
+ DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
};
} // End llvm namespace
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
index 37bebc449635..693d4310b834 100644
--- a/lib/Bitcode/Reader/CMakeLists.txt
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -2,8 +2,3 @@ add_llvm_library(LLVMBitReader
BitReader.cpp
BitcodeReader.cpp
)
-
-add_llvm_library_dependencies(LLVMBitReader
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Bitcode/Reader/LLVMBuild.txt b/lib/Bitcode/Reader/LLVMBuild.txt
new file mode 100644
index 000000000000..c85a87bfebc8
--- /dev/null
+++ b/lib/Bitcode/Reader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/Reader/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = BitReader
+parent = Bitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5b3d96953a0e..b25d2e96d594 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,6 +23,7 @@
#include "llvm/Operator.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -31,6 +32,12 @@
#include <map>
using namespace llvm;
+static cl::opt<bool>
+EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
+ cl::desc("Turn on experimental support for "
+ "use-list order preservation."),
+ cl::init(false), cl::Hidden);
+
/// These are manifest constants used by the bitcode writer. They do not need to
/// be kept in sync with the reader, but need to be consistent within this file.
enum {
@@ -119,7 +126,6 @@ static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
switch (Ordering) {
- default: llvm_unreachable("Unknown atomic ordering");
case NotAtomic: return bitc::ORDERING_NOTATOMIC;
case Unordered: return bitc::ORDERING_UNORDERED;
case Monotonic: return bitc::ORDERING_MONOTONIC;
@@ -128,14 +134,15 @@ static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
case AcquireRelease: return bitc::ORDERING_ACQREL;
case SequentiallyConsistent: return bitc::ORDERING_SEQCST;
}
+ llvm_unreachable("Invalid ordering");
}
static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) {
switch (SynchScope) {
- default: llvm_unreachable("Unknown synchronization scope");
case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD;
case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD;
}
+ llvm_unreachable("Invalid synch scope");
}
static void WriteStringRecord(unsigned Code, StringRef Str,
@@ -172,10 +179,11 @@ static void WriteAttributeTable(const ValueEnumerator &VE,
// Store the alignment in the bitcode as a 16-bit raw value instead of a
// 5-bit log2 encoded value. Shift the bits above the alignment up by
// 11 bits.
- uint64_t FauxAttr = PAWI.Attrs & 0xffff;
+ uint64_t FauxAttr = PAWI.Attrs.Raw() & 0xffff;
if (PAWI.Attrs & Attribute::Alignment)
- FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16);
- FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11;
+ FauxAttr |= (1ull<<16)<<
+ (((PAWI.Attrs & Attribute::Alignment).Raw()-1) >> 16);
+ FauxAttr |= (PAWI.Attrs.Raw() & (0x3FFull << 21)) << 11;
Record.push_back(FauxAttr);
}
@@ -194,11 +202,12 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
SmallVector<uint64_t, 64> TypeVals;
+ uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+
// Abbrev for TYPE_CODE_POINTER.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
@@ -206,10 +215,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
- Abbv->Add(BitCodeAbbrevOp(0)); // FIXME: DEAD value, remove in LLVM 3.0
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
// Abbrev for TYPE_CODE_STRUCT_ANON.
@@ -217,8 +225,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
// Abbrev for TYPE_CODE_STRUCT_NAME.
@@ -233,16 +241,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
// Emit an entry count so the reader can reserve space.
@@ -259,6 +267,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
switch (T->getTypeID()) {
default: llvm_unreachable("Unknown type!");
case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break;
+ case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break;
case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break;
case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
@@ -284,10 +293,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
}
case Type::FunctionTyID: {
FunctionType *FT = cast<FunctionType>(T);
- // FUNCTION: [isvararg, attrid, retty, paramty x N]
+ // FUNCTION: [isvararg, retty, paramty x N]
Code = bitc::TYPE_CODE_FUNCTION;
TypeVals.push_back(FT->isVarArg());
- TypeVals.push_back(0); // FIXME: DEAD: remove in llvm 3.0
TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
@@ -350,7 +358,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
static unsigned getEncodedLinkage(const GlobalValue *GV) {
switch (GV->getLinkage()) {
- default: llvm_unreachable("Invalid linkage!");
case GlobalValue::ExternalLinkage: return 0;
case GlobalValue::WeakAnyLinkage: return 1;
case GlobalValue::AppendingLinkage: return 2;
@@ -368,15 +375,16 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
case GlobalValue::LinkerPrivateWeakLinkage: return 14;
case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
}
+ llvm_unreachable("Invalid linkage");
}
static unsigned getEncodedVisibility(const GlobalValue *GV) {
switch (GV->getVisibility()) {
- default: llvm_unreachable("Invalid visibility!");
case GlobalValue::DefaultVisibility: return 0;
case GlobalValue::HiddenVisibility: return 1;
case GlobalValue::ProtectedVisibility: return 2;
}
+ llvm_unreachable("Invalid visibility");
}
// Emit top-level description of module, including target triple, inline asm,
@@ -499,8 +507,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the function proto information.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
- // FUNCTION: [type, callingconv, isproto, paramattr,
- // linkage, alignment, section, visibility, gc, unnamed_addr]
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
+ // section, visibility, gc, unnamed_addr]
Vals.push_back(VE.getTypeID(F->getType()));
Vals.push_back(F->getCallingConv());
Vals.push_back(F->isDeclaration());
@@ -520,6 +528,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the alias information.
for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
AI != E; ++AI) {
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
Vals.push_back(VE.getTypeID(AI->getType()));
Vals.push_back(VE.getValueID(AI->getAliasee()));
Vals.push_back(getEncodedLinkage(AI));
@@ -819,7 +828,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
} else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
Code = bitc::CST_CODE_FLOAT;
Type *Ty = CFP->getType();
- if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+ if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) {
Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
} else if (Ty->isX86_FP80Ty()) {
// api needed to prevent premature destruction
@@ -836,34 +845,56 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
} else {
assert (0 && "Unknown FP type!");
}
- } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
- const ConstantArray *CA = cast<ConstantArray>(C);
+ } else if (isa<ConstantDataSequential>(C) &&
+ cast<ConstantDataSequential>(C)->isString()) {
+ const ConstantDataSequential *Str = cast<ConstantDataSequential>(C);
// Emit constant strings specially.
- unsigned NumOps = CA->getNumOperands();
+ unsigned NumElts = Str->getNumElements();
// If this is a null-terminated string, use the denser CSTRING encoding.
- if (CA->getOperand(NumOps-1)->isNullValue()) {
+ if (Str->isCString()) {
Code = bitc::CST_CODE_CSTRING;
- --NumOps; // Don't encode the null, which isn't allowed by char6.
+ --NumElts; // Don't encode the null, which isn't allowed by char6.
} else {
Code = bitc::CST_CODE_STRING;
AbbrevToUse = String8Abbrev;
}
bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
- for (unsigned i = 0; i != NumOps; ++i) {
- unsigned char V = cast<ConstantInt>(CA->getOperand(i))->getZExtValue();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned char V = Str->getElementAsInteger(i);
Record.push_back(V);
isCStr7 &= (V & 128) == 0;
if (isCStrChar6)
isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
}
-
+
if (isCStrChar6)
AbbrevToUse = CString6Abbrev;
else if (isCStr7)
AbbrevToUse = CString7Abbrev;
- } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
- isa<ConstantVector>(V)) {
+ } else if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ Code = bitc::CST_CODE_DATA;
+ Type *EltTy = CDS->getType()->getElementType();
+ if (isa<IntegerType>(EltTy)) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+ Record.push_back(CDS->getElementAsInteger(i));
+ } else if (EltTy->isFloatTy()) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union { float F; uint32_t I; };
+ F = CDS->getElementAsFloat(i);
+ Record.push_back(I);
+ }
+ } else {
+ assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union { double F; uint64_t I; };
+ F = CDS->getElementAsDouble(i);
+ Record.push_back(I);
+ }
+ }
+ } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
Code = bitc::CST_CODE_AGGREGATE;
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -1105,10 +1136,18 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
}
break;
case Instruction::Switch:
- Code = bitc::FUNC_CODE_INST_SWITCH;
- Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- Vals.push_back(VE.getValueID(I.getOperand(i)));
+ {
+ Code = bitc::FUNC_CODE_INST_SWITCH;
+ SwitchInst &SI = cast<SwitchInst>(I);
+ Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
+ Vals.push_back(VE.getValueID(SI.getCondition()));
+ Vals.push_back(VE.getValueID(SI.getDefaultDest()));
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ Vals.push_back(VE.getValueID(i.getCaseValue()));
+ Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
+ }
+ }
break;
case Instruction::IndirectBr:
Code = bitc::FUNC_CODE_INST_INDIRECTBR;
@@ -1146,9 +1185,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Code = bitc::FUNC_CODE_INST_RESUME;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
break;
- case Instruction::Unwind:
- Code = bitc::FUNC_CODE_INST_UNWIND;
- break;
case Instruction::Unreachable:
Code = bitc::FUNC_CODE_INST_UNREACHABLE;
AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
@@ -1573,6 +1609,102 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+// Sort the Users based on the order in which the reader parses the bitcode
+// file.
+static bool bitcodereader_order(const User *lhs, const User *rhs) {
+ // TODO: Implement.
+ return true;
+}
+
+static void WriteUseList(const Value *V, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+
+ // One or zero uses can't get out of order.
+ if (V->use_empty() || V->hasNUses(1))
+ return;
+
+ // Make a copy of the in-memory use-list for sorting.
+ unsigned UseListSize = std::distance(V->use_begin(), V->use_end());
+ SmallVector<const User*, 8> UseList;
+ UseList.reserve(UseListSize);
+ for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
+ I != E; ++I) {
+ const User *U = *I;
+ UseList.push_back(U);
+ }
+
+ // Sort the copy based on the order read by the BitcodeReader.
+ std::sort(UseList.begin(), UseList.end(), bitcodereader_order);
+
+ // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
+ // sorted list (i.e., the expected BitcodeReader in-memory use-list).
+
+ // TODO: Emit the USELIST_CODE_ENTRYs.
+}
+
+static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ VE.incorporateFunction(*F);
+
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI)
+ WriteUseList(AI, VE, Stream);
+ for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
+ ++BB) {
+ WriteUseList(BB, VE, Stream);
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
+ WriteUseList(II, VE, Stream);
+ for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
+ OI != E; ++OI) {
+ if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+ isa<InlineAsm>(*OI))
+ WriteUseList(*OI, VE, Stream);
+ }
+ }
+ }
+ VE.purgeFunction();
+}
+
+// Emit use-lists.
+static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+
+ // XXX: this modifies the module, but in a way that should never change the
+ // behavior of any pass or codegen in LLVM. The problem is that GVs may
+ // contain entries in the use_list that do not exist in the Module and are
+ // not stored in the .bc file.
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ I->removeDeadConstantUsers();
+
+ // Write the global variables.
+ for (Module::const_global_iterator GI = M->global_begin(),
+ GE = M->global_end(); GI != GE; ++GI) {
+ WriteUseList(GI, VE, Stream);
+
+ // Write the global variable initializers.
+ if (GI->hasInitializer())
+ WriteUseList(GI->getInitializer(), VE, Stream);
+ }
+
+ // Write the functions.
+ for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+ WriteUseList(FI, VE, Stream);
+ if (!FI->isDeclaration())
+ WriteFunctionUseList(FI, VE, Stream);
+ }
+
+ // Write the aliases.
+ for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+ AI != AE; ++AI) {
+ WriteUseList(AI, VE, Stream);
+ WriteUseList(AI->getAliasee(), VE, Stream);
+ }
+
+ Stream.ExitBlock();
+}
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream) {
@@ -1607,17 +1739,21 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
// Emit metadata.
WriteModuleMetadata(M, VE, Stream);
- // Emit function bodies.
- for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
- if (!F->isDeclaration())
- WriteFunction(*F, VE, Stream);
-
// Emit metadata.
WriteModuleMetadataStore(M, Stream);
// Emit names for globals/functions etc.
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+ // Emit use-lists.
+ if (EnablePreserveUseListOrdering)
+ WriteModuleUseLists(M, VE, Stream);
+
+ // Emit function bodies.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+ if (!F->isDeclaration())
+ WriteFunction(*F, VE, Stream);
+
Stream.ExitBlock();
}
@@ -1639,7 +1775,17 @@ enum {
DarwinBCHeaderSize = 5*4
};
-static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
+static void WriteInt32ToBuffer(uint32_t Value, SmallVectorImpl<char> &Buffer,
+ uint32_t &Position) {
+ Buffer[Position + 0] = (unsigned char) (Value >> 0);
+ Buffer[Position + 1] = (unsigned char) (Value >> 8);
+ Buffer[Position + 2] = (unsigned char) (Value >> 16);
+ Buffer[Position + 3] = (unsigned char) (Value >> 24);
+ Position += 4;
+}
+
+static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
+ const Triple &TT) {
unsigned CPUType = ~0U;
// Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
@@ -1666,63 +1812,55 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
CPUType = DARWIN_CPU_TYPE_ARM;
// Traditional Bitcode starts after header.
+ assert(Buffer.size() >= DarwinBCHeaderSize &&
+ "Expected header size to be reserved");
unsigned BCOffset = DarwinBCHeaderSize;
+ unsigned BCSize = Buffer.size()-DarwinBCHeaderSize;
- Stream.Emit(0x0B17C0DE, 32);
- Stream.Emit(0 , 32); // Version.
- Stream.Emit(BCOffset , 32);
- Stream.Emit(0 , 32); // Filled in later.
- Stream.Emit(CPUType , 32);
-}
-
-/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
-/// finalize the header.
-static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
- // Update the size field in the header.
- Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+ // Write the magic and version.
+ unsigned Position = 0;
+ WriteInt32ToBuffer(0x0B17C0DE , Buffer, Position);
+ WriteInt32ToBuffer(0 , Buffer, Position); // Version.
+ WriteInt32ToBuffer(BCOffset , Buffer, Position);
+ WriteInt32ToBuffer(BCSize , Buffer, Position);
+ WriteInt32ToBuffer(CPUType , Buffer, Position);
// If the file is not a multiple of 16 bytes, insert dummy padding.
- while (BufferSize & 15) {
- Stream.Emit(0, 8);
- ++BufferSize;
- }
+ while (Buffer.size() & 15)
+ Buffer.push_back(0);
}
-
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
- std::vector<unsigned char> Buffer;
- BitstreamWriter Stream(Buffer);
-
+ SmallVector<char, 1024> Buffer;
Buffer.reserve(256*1024);
- WriteBitcodeToStream( M, Stream );
-
- // Write the generated bitstream to "Out".
- Out.write((char*)&Buffer.front(), Buffer.size());
-}
-
-/// WriteBitcodeToStream - Write the specified module to the specified output
-/// stream.
-void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
- // If this is darwin or another generic macho target, emit a file header and
- // trailer if needed.
+ // If this is darwin or another generic macho target, reserve space for the
+ // header.
Triple TT(M->getTargetTriple());
if (TT.isOSDarwin())
- EmitDarwinBCHeader(Stream, TT);
-
- // Emit the file header.
- Stream.Emit((unsigned)'B', 8);
- Stream.Emit((unsigned)'C', 8);
- Stream.Emit(0x0, 4);
- Stream.Emit(0xC, 4);
- Stream.Emit(0xE, 4);
- Stream.Emit(0xD, 4);
-
- // Emit the module.
- WriteModule(M, Stream);
+ Buffer.insert(Buffer.begin(), DarwinBCHeaderSize, 0);
+
+ // Emit the module into the buffer.
+ {
+ BitstreamWriter Stream(Buffer);
+
+ // Emit the file header.
+ Stream.Emit((unsigned)'B', 8);
+ Stream.Emit((unsigned)'C', 8);
+ Stream.Emit(0x0, 4);
+ Stream.Emit(0xC, 4);
+ Stream.Emit(0xE, 4);
+ Stream.Emit(0xD, 4);
+
+ // Emit the module.
+ WriteModule(M, Stream);
+ }
if (TT.isOSDarwin())
- EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
+ EmitDarwinBCHeaderAndTrailer(Buffer, TT);
+
+ // Write the generated bitstream to "Out".
+ Out.write((char*)&Buffer.front(), Buffer.size());
}
diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt
index 3cf905697a42..f097b097c337 100644
--- a/lib/Bitcode/Writer/CMakeLists.txt
+++ b/lib/Bitcode/Writer/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMBitWriter
BitcodeWriterPass.cpp
ValueEnumerator.cpp
)
-
-add_llvm_library_dependencies(LLVMBitWriter
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt
new file mode 100644
index 000000000000..7d9e1de771b9
--- /dev/null
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/Writer/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = BitWriter
+parent = Bitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9ae9905b9f1d..1ed9004eb5a1 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -19,6 +19,8 @@
#include "llvm/Module.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
@@ -107,7 +109,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
OptimizeConstants(FirstConstant, Values.size());
}
-
unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert(I != InstructionMap.end() && "Instruction is not mapped!");
@@ -130,6 +131,43 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
return I->second-1;
}
+void ValueEnumerator::dump() const {
+ print(dbgs(), ValueMap, "Default");
+ dbgs() << '\n';
+ print(dbgs(), MDValueMap, "MetaData");
+ dbgs() << '\n';
+}
+
+void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+ const char *Name) const {
+
+ OS << "Map Name: " << Name << "\n";
+ OS << "Size: " << Map.size() << "\n";
+ for (ValueMapType::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+
+ const Value *V = I->first;
+ if (V->hasName())
+ OS << "Value: " << V->getName();
+ else
+ OS << "Value: [null]\n";
+ V->dump();
+
+ OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ if (UI != V->use_begin())
+ OS << ",";
+ if((*UI)->hasName())
+ OS << " " << (*UI)->getName();
+ else
+ OS << " [null]";
+
+ }
+ OS << "\n\n";
+ }
+}
+
// Optimize constant ordering.
namespace {
struct CstSortPredicate {
@@ -283,10 +321,6 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
if (const Constant *C = dyn_cast<Constant>(V)) {
if (isa<GlobalValue>(C)) {
// Initializers for globals are handled explicitly elsewhere.
- } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
- // Do not enumerate the initializers for an array of simple characters.
- // The initializers just pollute the value table, and we emit the strings
- // specially.
} else if (C->getNumOperands()) {
// If a constant has operands, enumerate them. This makes sure that if a
// constant has uses (for example an array of const ints), that they are
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b6fc920e412b..a6ca53606248 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -32,6 +32,7 @@ class NamedMDNode;
class AttrListPtr;
class ValueSymbolTable;
class MDSymbolTable;
+class raw_ostream;
class ValueEnumerator {
public:
@@ -83,6 +84,9 @@ private:
public:
ValueEnumerator(const Module *M);
+ void dump() const;
+ void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
unsigned getValueID(const Value *V) const;
unsigned getTypeID(Type *T) const {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25842a7876a2..822a564441ac 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
assert(State == NULL);
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
- bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
@@ -157,7 +157,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- for (const unsigned *Alias = TRI->getOverlaps(*I);
+ for (const uint16_t *Alias = TRI->getOverlaps(*I);
unsigned Reg = *Alias; ++Alias) {
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
@@ -173,7 +173,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- for (const unsigned *Alias = TRI->getOverlaps(*I);
+ for (const uint16_t *Alias = TRI->getOverlaps(*I);
unsigned Reg = *Alias; ++Alias) {
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
@@ -186,10 +186,10 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ for (const uint16_t *Alias = TRI->getOverlaps(Reg);
unsigned AliasReg = *Alias; ++Alias) {
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
@@ -265,7 +265,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
IsImplicitDefUse(MI, MO)) {
const unsigned Reg = MO.getReg();
PassthruRegs.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
PassthruRegs.insert(*Subreg);
}
@@ -333,7 +333,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
}
// Repeat for subregisters.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
unsigned SubregReg = *Subreg;
if (!State->IsLive(SubregReg)) {
@@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
TII->isPredicated(MI)) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
@@ -392,7 +392,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Any aliased that are live at this point are completely or
// partially defined here, so group those aliases with Reg.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (State->IsLive(AliasReg)) {
State->UnionGroups(Reg, AliasReg);
@@ -423,7 +423,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
continue;
// Update def for Reg and aliases.
- for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ for (const uint16_t *Alias = TRI->getOverlaps(Reg);
unsigned AliasReg = *Alias; ++Alias)
DefIndices[AliasReg] = Count;
}
@@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->getDesc().isCall() ||
- MI->getDesc().hasExtraSrcRegAllocReq() ||
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register uses for this instruction and update
@@ -678,7 +678,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
goto next_super_reg;
} else {
bool found = false;
- for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+ for (const uint16_t *Alias = TRI->getAliasSet(NewReg);
*Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (State->IsLive(AliasReg) ||
@@ -780,6 +780,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
I != E; --Count) {
MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
DEBUG(dbgs() << "Anti: ");
DEBUG(MI->dump());
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 1005f102bea6..87f64311a655 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -41,7 +41,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
if (HintPair.first) {
const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
// The remaining allocation order may depend on the hint.
- ArrayRef<unsigned> Order =
+ ArrayRef<uint16_t> Order =
TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
VRM.getMachineFunction());
if (Order.empty())
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index d1e48a1f2e96..0ce7e0c3b5f6 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -34,8 +34,7 @@ public:
/// AllocationOrder - Create a new AllocationOrder for VirtReg.
/// @param VirtReg Virtual register to allocate for.
/// @param VRM Virtual register map for function.
- /// @param ReservedRegs Set of reserved registers as returned by
- /// TargetRegisterInfo::getReservedRegs().
+ /// @param RegClassInfo Information about reserved and allocatable registers.
AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
const RegisterClassInfo &RegClassInfo);
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index fafc01044d4f..00874d411378 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,4 +1,4 @@
-//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
@@ -149,33 +150,37 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
/// consideration of global floating-point math flags.
///
ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
- ISD::CondCode FPC, FOC;
switch (Pred) {
- case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
- case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
- case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
- case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
- case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
- case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
- case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
- case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
- case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
- case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
- case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
- case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
- case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
- case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
- case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
- case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
- default:
- llvm_unreachable("Invalid FCmp predicate opcode!");
- FOC = FPC = ISD::SETFALSE;
- break;
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: llvm_unreachable("Invalid FCmp predicate opcode!");
+ }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: return CC;
}
- if (NoNaNsFPMath)
- return FOC;
- else
- return FPC;
}
/// getICmpCondCode - Return the ISD condition code corresponding to
@@ -195,7 +200,6 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
case ICmpInst::ICMP_UGT: return ISD::SETUGT;
default:
llvm_unreachable("Invalid ICmp predicate opcode!");
- return ISD::SETNE;
}
}
@@ -221,12 +225,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+ (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ !isa<UnreachableInst>(Term))) return false;
// If I will have a chain, make sure no other instruction that will have a
// chain interposes between I and the return.
if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
- !I->isSafeToSpeculativelyExecute())
+ !isSafeToSpeculativelyExecute(I))
for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
--BBI) {
if (&*BBI == I)
@@ -235,7 +240,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
- !BBI->isSafeToSpeculativelyExecute())
+ !isSafeToSpeculativelyExecute(BBI))
return false;
}
@@ -250,7 +255,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
const Function *F = ExitBB->getParent();
- unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
return false;
@@ -285,12 +290,12 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
}
bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
- const TargetLowering &TLI) {
+ SDValue &Chain, const TargetLowering &TLI) {
const Function *F = DAG.getMachineFunction().getFunction();
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
if (CallerRetAttr & ~Attribute::NoAlias)
return false;
@@ -299,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
return false;
// Check if the only use is a function return node.
- return TLI.isUsedByReturnOnly(Node);
+ return TLI.isUsedByReturnOnly(Node, Chain);
}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 3f2387325360..b60fda86a6ba 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -29,6 +29,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
@@ -36,6 +37,12 @@
#include "llvm/ADT/Twine.h"
using namespace llvm;
+cl::opt<bool>
+EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
+ cl::init(false));
+
+
ARMException::ARMException(AsmPrinter *A)
: DwarfException(A),
shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
@@ -72,13 +79,15 @@ void ARMException::EndFunction() {
Asm->OutStreamer.EmitPersonality(PerSym);
}
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
+ if (EnableARMEHABIDescriptors) {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
- Asm->OutStreamer.EmitHandlerData();
+ Asm->OutStreamer.EmitHandlerData();
- // Emit actual exception table
- EmitExceptionTable();
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
}
Asm->OutStreamer.EmitFnEnd();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1999f3608788..b0b2ff4882af 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -100,6 +100,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
OutStreamer(Streamer),
LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
DD = 0; DE = 0; MMI = 0; LI = 0;
+ CurrentFnSym = CurrentFnSymForSize = 0;
GCMetadataPrinters = 0;
VerboseAsm = Streamer.isVerboseAsm();
}
@@ -613,6 +614,10 @@ bool AsmPrinter::needsSEHMoves() {
MF->getFunction()->needsUnwindTableEntry();
}
+bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
+ return MAI->doesDwarfUseRelocationsForStringPool();
+}
+
void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
MCSymbol *Label = MI.getOperand(0).getMCSymbol();
@@ -732,6 +737,18 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer.EmitRawText(StringRef("\tnop\n"));
}
+ const Function *F = MF->getFunction();
+ for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
+ const BasicBlock *BB = i;
+ if (!BB->hasAddressTaken())
+ continue;
+ MCSymbol *Sym = GetBlockAddressSymbol(BB);
+ if (Sym->isDefined())
+ continue;
+ OutStreamer.AddComment("Address of block that was removed by CodeGen");
+ OutStreamer.EmitLabel(Sym);
+ }
+
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
@@ -745,7 +762,8 @@ void AsmPrinter::EmitFunctionBody() {
const MCExpr *SizeExp =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
- MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+ MCSymbolRefExpr::Create(CurrentFnSymForSize,
+ OutContext),
OutContext);
OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
}
@@ -780,7 +798,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
- for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+ for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg());
*SR && Reg < 0; ++SR) {
Reg = TRI->getDwarfRegNum(*SR, false);
// FIXME: Get the bit range this register uses of the superregister
@@ -841,6 +859,12 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
+ // Emit module flags.
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+ if (!ModuleFlags.empty())
+ getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+
// Finalize debug and EH information.
if (DE) {
{
@@ -929,6 +953,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
CurrentFnSym = Mang->getSymbol(MF.getFunction());
+ CurrentFnSymForSize = CurrentFnSym;
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
@@ -1120,7 +1145,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MCExpr *Value = 0;
switch (MJTI->getEntryKind()) {
case MachineJumpTableInfo::EK_Inline:
- llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+ llvm_unreachable("Cannot emit EK_Inline jump table entry");
case MachineJumpTableInfo::EK_Custom32:
Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
OutContext);
@@ -1139,6 +1164,15 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
return;
}
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+ // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gpdword LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
case MachineJumpTableInfo::EK_LabelDifference32: {
// EK_LabelDifference32 - Each entry is the address of the block minus
// the address of the jump table. This is used for PIC jump tables where
@@ -1191,12 +1225,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
assert(GV->hasInitializer() && "Not a special LLVM global!");
- const TargetData *TD = TM.getTargetData();
- unsigned Align = Log2_32(TD->getPointerPrefAlignment());
if (GV->getName() == "llvm.global_ctors") {
- OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
- EmitAlignment(Align);
- EmitXXStructorList(GV->getInitializer());
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1208,9 +1238,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
if (GV->getName() == "llvm.global_dtors") {
- OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
- EmitAlignment(Align);
- EmitXXStructorList(GV->getInitializer());
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1240,7 +1268,7 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
}
}
-typedef std::pair<int, Constant*> Structor;
+typedef std::pair<unsigned, Constant*> Structor;
static bool priority_order(const Structor& lhs, const Structor& rhs) {
return lhs.first < rhs.first;
@@ -1248,7 +1276,7 @@ static bool priority_order(const Structor& lhs, const Structor& rhs) {
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
// Should be an array of '{ int, void ()* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
@@ -1274,19 +1302,20 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) {
CS->getOperand(1)));
}
- // Emit the function pointers in reverse priority order.
- switch (MAI->getStructorOutputOrder()) {
- case Structors::None:
- break;
- case Structors::PriorityOrder:
- std::sort(Structors.begin(), Structors.end(), priority_order);
- break;
- case Structors::ReversePriorityOrder:
- std::sort(Structors.rbegin(), Structors.rend(), priority_order);
- break;
+ // Emit the function pointers in the target-specific order
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+ for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+ const MCSection *OutputSection =
+ (isCtor ?
+ getObjFileLowering().getStaticCtorSection(Structors[i].first) :
+ getObjFileLowering().getStaticDtorSection(Structors[i].first));
+ OutStreamer.SwitchSection(OutputSection);
+ if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+ EmitAlignment(Align);
+ EmitXXStructor(Structors[i].second);
}
- for (unsigned i = 0, e = Structors.size(); i != e; ++i)
- EmitGlobalConstant(Structors[i].second);
}
//===--------------------------------------------------------------------===//
@@ -1423,7 +1452,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (CE == 0) {
llvm_unreachable("Unknown constant value to lower!");
- return MCConstantExpr::Create(0, Ctx);
}
switch (CE->getOpcode()) {
@@ -1445,7 +1473,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
!AP.MF ? 0 : AP.MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
- return MCConstantExpr::Create(0, Ctx);
case Instruction::GetElementPtr: {
const TargetData &TD = *AP.TM.getTargetData();
// Generate a symbolic expression for the byte address
@@ -1543,6 +1570,19 @@ static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+ StringRef Data = V->getRawDataValues();
+ assert(!Data.empty() && "Empty aggregates should be CAZ node");
+ char C = Data[0];
+ for (unsigned i = 1, e = Data.size(); i != e; ++i)
+ if (Data[i] != C) return -1;
+ return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -1568,8 +1608,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
// Make sure all array elements are sequences of the same repeated
// byte.
- if (CA->getNumOperands() == 0) return -1;
-
+ assert(CA->getNumOperands() != 0 && "Should be a CAZ");
int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
if (Byte == -1) return -1;
@@ -1580,37 +1619,92 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
}
return Byte;
}
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+ return isRepeatedByteSequence(CDS);
return -1;
}
-static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
- AsmPrinter &AP) {
- if (AddrSpace != 0 || !CA->isString()) {
- // Not a string. Print the values in successive locations.
-
- // See if we can aggregate some values. Make sure it can be
- // represented as a series of bytes of the constant value.
- int Value = isRepeatedByteSequence(CA, AP.TM);
-
- if (Value != -1) {
- uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
- AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+ unsigned AddrSpace,AsmPrinter &AP){
+
+ // See if we can aggregate this into a .fill, if so, emit it as such.
+ int Value = isRepeatedByteSequence(CDS, AP.TM);
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+ // Don't emit a 1-byte object as a .fill.
+ if (Bytes > 1)
+ return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+
+ // If this can be emitted with .ascii/.asciz, emit it as such.
+ if (CDS->isString())
+ return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+
+ // Otherwise, emit the values in successive locations.
+ unsigned ElementByteSize = CDS->getElementByteSize();
+ if (isa<IntegerType>(CDS->getElementType())) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CDS->getElementAsInteger(i));
+ AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
+ ElementByteSize, AddrSpace);
+ }
+ } else if (ElementByteSize == 4) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ assert(CDS->getElementType()->isFloatTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ float F;
+ uint32_t I;
+ };
+
+ F = CDS->getElementAsFloat(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
}
- else {
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ } else {
+ assert(CDS->getElementType()->isDoubleTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ double F;
+ uint64_t I;
+ };
+
+ F = CDS->getElementAsDouble(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
}
- return;
}
- // Otherwise, it can be emitted as .ascii.
- SmallVector<char, 128> TmpVec;
- TmpVec.reserve(CA->getNumOperands());
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ const TargetData &TD = *AP.TM.getTargetData();
+ unsigned Size = TD.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+ CDS->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
- AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, AP.TM);
+
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+ AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ }
}
static void EmitGlobalConstantVector(const ConstantVector *CV,
@@ -1656,29 +1750,44 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
AsmPrinter &AP) {
- // FP Constants are printed as integer constants to avoid losing
- // precision.
- if (CFP->getType()->isDoubleTy()) {
+ if (CFP->getType()->isHalfTy()) {
if (AP.isVerbose()) {
- double Val = CFP->getValueAPF().convertToDouble();
- AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+ SmallString<10> Str;
+ CFP->getValueAPF().toString(Str);
+ AP.OutStreamer.GetCommentOS() << "half " << Str << '\n';
}
-
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace);
return;
}
if (CFP->getType()->isFloatTy()) {
if (AP.isVerbose()) {
float Val = CFP->getValueAPF().convertToFloat();
- AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+ uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'
+ << " (" << format("0x%x", IntVal) << ")\n";
}
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
return;
}
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ if (CFP->getType()->isDoubleTy()) {
+ if (AP.isVerbose()) {
+ double Val = CFP->getValueAPF().convertToDouble();
+ uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'
+ << " (" << format("0x%lx", IntVal) << ")\n";
+ }
+
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ return;
+ }
+
if (CFP->getType()->isX86_FP80Ty()) {
// all long double variants are printed as hex
// API needed to prevent premature destruction
@@ -1742,20 +1851,20 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
AsmPrinter &AP) {
- if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
- uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ const TargetData *TD = AP.TM.getTargetData();
+ uint64_t Size = TD->getTypeAllocSize(CV->getType());
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size, AddrSpace);
- }
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
switch (Size) {
case 1:
case 2:
case 4:
case 8:
if (AP.isVerbose())
- AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CI->getZExtValue());
AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
return;
default:
@@ -1764,29 +1873,45 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
}
}
- if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return EmitGlobalConstantArray(CVA, AddrSpace, AP);
-
- if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
-
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
return EmitGlobalConstantFP(CFP, AddrSpace, AP);
if (isa<ConstantPointerNull>(CV)) {
- unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
return;
}
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+ return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
+ if (CE->getOpcode() == Instruction::BitCast)
+ return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ return EmitGlobalConstantImpl(New, AddrSpace, AP);
+ }
+ }
+
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return EmitGlobalConstantVector(V, AddrSpace, AP);
-
+
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
- AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
- AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
- AddrSpace);
+ AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
@@ -1953,7 +2078,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB->getAlignment())
- EmitAlignment(Log2_32(Align));
+ EmitAlignment(Align);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -1970,27 +2095,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
OutStreamer.EmitLabel(Syms[i]);
}
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
// Print the main label for the block.
if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
- if (BB->hasName())
- OutStreamer.AddComment("%" + BB->getName());
-
- EmitBasicBlockLoopComments(*MBB, LI, *this);
-
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
Twine(MBB->getNumber()) + ":");
}
} else {
- if (isVerbose()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
- if (BB->hasName())
- OutStreamer.AddComment("%" + BB->getName());
- EmitBasicBlockLoopComments(*MBB, LI, *this);
- }
-
OutStreamer.EmitLabel(MBB->getSymbol());
}
}
@@ -2048,7 +2168,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
MachineInstr &MI = *II;
// If it is not a simple branch, we are in a table somewhere.
- if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+ if (!MI.isBranch() || MI.isIndirectBranch())
return false;
// If we are the operands of one of the branches, this is not
@@ -2090,6 +2210,4 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
}
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
- return 0;
}
-
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 4d6c28118427..90d511cbab0a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -35,23 +36,8 @@ using namespace llvm;
void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
-
- if (MAI->hasLEB128()) {
- OutStreamer.EmitSLEB128IntValue(Value);
- return;
- }
- // If we don't have .sleb128, emit as .bytes.
- int Sign = Value >> (8 * sizeof(Value) - 1);
- bool IsMore;
-
- do {
- unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
- Value >>= 7;
- IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
- if (IsMore) Byte |= 0x80;
- OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
- } while (IsMore);
+ OutStreamer.EmitSLEB128IntValue(Value);
}
/// EmitULEB128 - emit the specified signed leb128 value.
@@ -60,25 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- // FIXME: Should we add a PadTo option to the streamer?
- if (MAI->hasLEB128() && PadTo == 0) {
- OutStreamer.EmitULEB128IntValue(Value);
- return;
- }
-
- // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
- do {
- unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
- Value >>= 7;
- if (Value || PadTo != 0) Byte |= 0x80;
- OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
- } while (Value);
-
- if (PadTo) {
- if (PadTo > 1)
- OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
- OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
- }
+ OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo);
}
/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
@@ -143,7 +111,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
return 0;
switch (Encoding & 0x07) {
- default: assert(0 && "Invalid encoded value.");
+ default: llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
case dwarf::DW_EH_PE_udata2: return 2;
case dwarf::DW_EH_PE_udata4: return 4;
@@ -177,9 +145,8 @@ void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
const MCSymbol *SectionLabel) const {
// On COFF targets, we have to emit the special .secrel32 directive.
- if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
- // FIXME: MCize.
- OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+ if (MAI->getDwarfSectionOffsetDirective()) {
+ OutStreamer.EmitCOFFSecRel32(Label);
return;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 8eda889155a2..d60585465be0 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -326,7 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
- if (OpNo >= MI->getNumOperands()) {
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 67d927348b54..58fe2ed9d357 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter
AsmPrinterDwarf.cpp
AsmPrinterInlineAsm.cpp
DIE.cpp
+ DwarfAccelTable.cpp
DwarfCFIException.cpp
DwarfCompileUnit.cpp
DwarfDebug.cpp
@@ -11,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter
OcamlGCPrinter.cpp
Win64Exception.cpp
)
-
-add_llvm_library_dependencies(LLVMAsmPrinter
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 9c1ce761b0c5..3776848e3f47 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,15 +112,6 @@ DIE::~DIE() {
delete Children[i];
}
-/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
-///
-DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
- DIEInteger *DI = new (A) DIEInteger(0);
- Values.insert(Values.begin(), DI);
- Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
- return DI;
-}
-
#ifndef NDEBUG
void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount += IncIndent;
@@ -174,6 +165,7 @@ void DIE::dump() {
}
#endif
+void DIEValue::anchor() { }
#ifndef NDEBUG
void DIEValue::dump() {
@@ -223,33 +215,14 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
- default: llvm_unreachable("DIE Value form not supported yet"); break;
+ default: llvm_unreachable("DIE Value form not supported yet");
}
- return 0;
}
#ifndef NDEBUG
void DIEInteger::print(raw_ostream &O) {
- O << "Int: " << (int64_t)Integer
- << format(" 0x%llx", (unsigned long long)Integer);
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// DIEString Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit string value.
-///
-void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
- AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
- // Emit nul terminator.
- AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
-}
-
-#ifndef NDEBUG
-void DIEString::print(raw_ostream &O) {
- O << "Str: \"" << Str << "\"";
+ O << "Int: " << (int64_t)Integer << " 0x";
+ O.write_hex(Integer);
}
#endif
@@ -267,6 +240,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
///
unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
return AP->getTargetData().getPointerSize();
}
@@ -290,6 +264,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
///
unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
return AP->getTargetData().getPointerSize();
}
@@ -335,7 +310,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
///
void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
switch (Form) {
- default: assert(0 && "Improper form for block"); break;
+ default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
@@ -355,9 +330,8 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
- default: llvm_unreachable("Improper form for block"); break;
+ default: llvm_unreachable("Improper form for block");
}
- return 0;
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 7d61f1edff4a..f93ea1b045b2 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -31,17 +31,17 @@ namespace llvm {
class DIEAbbrevData {
/// Attribute - Dwarf attribute code.
///
- unsigned Attribute;
+ uint16_t Attribute;
/// Form - Dwarf form code.
///
- unsigned Form;
+ uint16_t Form;
public:
- DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+ DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
// Accessors.
- unsigned getAttribute() const { return Attribute; }
- unsigned getForm() const { return Form; }
+ uint16_t getAttribute() const { return Attribute; }
+ uint16_t getForm() const { return Form; }
/// Profile - Used to gather unique data for the abbreviation folding set.
///
@@ -54,41 +54,41 @@ namespace llvm {
class DIEAbbrev : public FoldingSetNode {
/// Tag - Dwarf tag code.
///
- unsigned Tag;
+ uint16_t Tag;
- /// Unique number for node.
+ /// ChildrenFlag - Dwarf children flag.
///
- unsigned Number;
+ uint16_t ChildrenFlag;
- /// ChildrenFlag - Dwarf children flag.
+ /// Unique number for node.
///
- unsigned ChildrenFlag;
+ unsigned Number;
/// Data - Raw data bytes for abbreviation.
///
SmallVector<DIEAbbrevData, 8> Data;
public:
- DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+ DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
// Accessors.
- unsigned getTag() const { return Tag; }
+ uint16_t getTag() const { return Tag; }
unsigned getNumber() const { return Number; }
- unsigned getChildrenFlag() const { return ChildrenFlag; }
+ uint16_t getChildrenFlag() const { return ChildrenFlag; }
const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
- void setTag(unsigned T) { Tag = T; }
- void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setTag(uint16_t T) { Tag = T; }
+ void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
void setNumber(unsigned N) { Number = N; }
/// AddAttribute - Adds another set of attribute information to the
/// abbreviation.
- void AddAttribute(unsigned Attribute, unsigned Form) {
+ void AddAttribute(uint16_t Attribute, uint16_t Form) {
Data.push_back(DIEAbbrevData(Attribute, Form));
}
/// AddFirstAttribute - Adds a set of attribute information to the front
/// of the abbreviation.
- void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ void AddFirstAttribute(uint16_t Attribute, uint16_t Form) {
Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
}
@@ -113,10 +113,6 @@ namespace llvm {
class DIE {
protected:
- /// Abbrev - Buffer for constructing abbreviation.
- ///
- DIEAbbrev Abbrev;
-
/// Offset - Offset in debug info section.
///
unsigned Offset;
@@ -125,6 +121,10 @@ namespace llvm {
///
unsigned Size;
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
/// Children DIEs.
///
std::vector<DIE *> Children;
@@ -139,8 +139,8 @@ namespace llvm {
mutable unsigned IndentCount;
public:
explicit DIE(unsigned Tag)
- : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
- Size(0), Parent (0), IndentCount(0) {}
+ : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
+ IndentCount(0) {}
virtual ~DIE();
// Accessors.
@@ -163,16 +163,6 @@ namespace llvm {
Values.push_back(Value);
}
- /// SiblingOffset - Return the offset of the debug information entry's
- /// sibling.
- unsigned getSiblingOffset() const { return Offset + Size; }
-
- /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
- /// The caller is responsible for deleting the return value at or after the
- /// same time it destroys this DIE.
- ///
- DIEValue *addSiblingOffset(BumpPtrAllocator &A);
-
/// addChild - Add a child to the DIE.
///
void addChild(DIE *Child) {
@@ -195,12 +185,12 @@ namespace llvm {
/// DIEValue - A debug information entry value.
///
class DIEValue {
+ virtual void anchor();
public:
enum {
isInteger,
isString,
isLabel,
- isSectionOffset,
isDelta,
isEntry,
isBlock
@@ -276,33 +266,6 @@ namespace llvm {
};
//===--------------------------------------------------------------------===//
- /// DIEString - A string value DIE. This DIE keeps string reference only.
- ///
- class DIEString : public DIEValue {
- const StringRef Str;
- public:
- explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
-
- /// EmitValue - Emit string value.
- ///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
-
- /// SizeOf - Determine size of string value in bytes.
- ///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
- return Str.size() + sizeof(char); // sizeof('\0');
- }
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEString *) { return true; }
- static bool classof(const DIEValue *S) { return S->getType() == isString; }
-
-#ifndef NDEBUG
- virtual void print(raw_ostream &O);
-#endif
- };
-
- //===--------------------------------------------------------------------===//
/// DIELabel - A label expression DIE.
//
class DIELabel : public DIEValue {
@@ -359,7 +322,7 @@ namespace llvm {
};
//===--------------------------------------------------------------------===//
- /// DIEntry - A pointer to another debug information entry. An instance of
+ /// DIEEntry - A pointer to another debug information entry. An instance of
/// this class can also be used as a proxy for a debug information entry not
/// yet defined (ie. types.)
class DIEEntry : public DIEValue {
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 000000000000..660684d1bea5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,287 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+ switch (AT) {
+ case eAtomTypeNULL: return "eAtomTypeNULL";
+ case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+ case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+ case eAtomTypeTag: return "eAtomTypeTag";
+ case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+ case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+ }
+ llvm_unreachable("invalid AtomType!");
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+ Header(12),
+ HeaderData(atom) {
+}
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) :
+ Header(8 + (atomList.size() * 4)),
+ HeaderData(atomList) {
+}
+
+DwarfAccelTable::~DwarfAccelTable() {
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ delete Data[i];
+ for (StringMap<DataArray>::iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI)
+ for (DataArray::iterator DI = EI->second.begin(),
+ DE = EI->second.end(); DI != DE; ++DI)
+ delete (*DI);
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+ // If the string is in the list already then add this die to the list
+ // otherwise add a new one.
+ DataArray &DIEs = Entries[Name];
+ DIEs.push_back(new HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> uniques(Data.size());
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ uniques[i] = Data[i]->HashValue;
+ array_pod_sort(uniques.begin(), uniques.end());
+ std::vector<uint32_t>::iterator p =
+ std::unique(uniques.begin(), uniques.end());
+ uint32_t num = std::distance(uniques.begin(), p);
+
+ // Then compute the bucket size, minimum of 1 bucket.
+ if (num > 1024) Header.bucket_count = num/4;
+ if (num > 16) Header.bucket_count = num/2;
+ else Header.bucket_count = num > 0 ? num : 1;
+
+ Header.hashes_count = num;
+}
+
+namespace {
+ // DIESorter - comparison predicate that sorts DIEs by their offset.
+ struct DIESorter {
+ bool operator()(const struct DwarfAccelTable::HashDataContents *A,
+ const struct DwarfAccelTable::HashDataContents *B) const {
+ return A->Die->getOffset() < B->Die->getOffset();
+ }
+ };
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+ // Create the individual hash data outputs.
+ for (StringMap<DataArray>::iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ struct HashData *Entry = new HashData((*EI).getKeyData());
+
+ // Unique the entries.
+ std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter());
+ EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
+ EI->second.end());
+
+ for (DataArray::const_iterator DI = EI->second.begin(),
+ DE = EI->second.end();
+ DI != DE; ++DI)
+ Entry->addData((*DI));
+ Data.push_back(Entry);
+ }
+
+ // Figure out how many buckets we need, then compute the bucket
+ // contents and the final ordering. We'll emit the hashes and offsets
+ // by doing a walk during the emission phase. We add temporary
+ // symbols to the data so that we can reference them during the offset
+ // later, we'll emit them when we emit the data.
+ ComputeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(Header.bucket_count);
+ for (size_t i = 0, e = Data.size(); i < e; ++i) {
+ uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+ Buckets[bucket].push_back(Data[i]);
+ Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+ Asm->OutStreamer.AddComment("Header Magic");
+ Asm->EmitInt32(Header.magic);
+ Asm->OutStreamer.AddComment("Header Version");
+ Asm->EmitInt16(Header.version);
+ Asm->OutStreamer.AddComment("Header Hash Function");
+ Asm->EmitInt16(Header.hash_function);
+ Asm->OutStreamer.AddComment("Header Bucket Count");
+ Asm->EmitInt32(Header.bucket_count);
+ Asm->OutStreamer.AddComment("Header Hash Count");
+ Asm->EmitInt32(Header.hashes_count);
+ Asm->OutStreamer.AddComment("Header Data Length");
+ Asm->EmitInt32(Header.header_data_len);
+ Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+ Asm->EmitInt32(HeaderData.die_offset_base);
+ Asm->OutStreamer.AddComment("HeaderData Atom Count");
+ Asm->EmitInt32(HeaderData.Atoms.size());
+ for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+ Atom A = HeaderData.Atoms[i];
+ Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+ Asm->EmitInt16(A.type);
+ Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+ Asm->EmitInt16(A.form);
+ }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+ if (Buckets[i].size() != 0)
+ Asm->EmitInt32(index);
+ else
+ Asm->EmitInt32(UINT32_MAX);
+ index += Buckets[i].size();
+ }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
+ Asm->EmitInt32((*HI)->HashValue);
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
+ MCContext &Context = Asm->OutStreamer.getContext();
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+ MCSymbolRefExpr::Create(SecBegin, Context),
+ Context);
+ Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+ }
+ }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer.EmitLabel((*HI)->Sym);
+ Asm->OutStreamer.AddComment((*HI)->Str);
+ Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+ D->getStringPool());
+ Asm->OutStreamer.AddComment("Num DIEs");
+ Asm->EmitInt32((*HI)->Data.size());
+ for (std::vector<struct HashDataContents*>::const_iterator
+ DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+ DI != DE; ++DI) {
+ // Emit the DIE offset
+ Asm->EmitInt32((*DI)->Die->getOffset());
+ // If we have multiple Atoms emit that info too.
+ // FIXME: A bit of a hack, we either emit only one atom or all info.
+ if (HeaderData.Atoms.size() > 1) {
+ Asm->EmitInt16((*DI)->Die->getTag());
+ Asm->EmitInt8((*DI)->Flags);
+ }
+ }
+ // Emit a 0 to terminate the data unless we have a hash collision.
+ if (PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
+ PrevHash = (*HI)->HashValue;
+ }
+ }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+ DwarfDebug *D) {
+ // Emit the header.
+ EmitHeader(Asm);
+
+ // Emit the buckets.
+ EmitBuckets(Asm);
+
+ // Emit the hashes.
+ EmitHashes(Asm);
+
+ // Emit the offsets.
+ EmitOffsets(Asm, SecBegin);
+
+ // Emit the hash data.
+ EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+ Header.print(O);
+ HeaderData.print(O);
+
+ O << "Entries: \n";
+ for (StringMap<DataArray>::const_iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ O << "Name: " << EI->getKeyData() << "\n";
+ for (DataArray::const_iterator DI = EI->second.begin(),
+ DE = EI->second.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
+ }
+
+ O << "Buckets and Hashes: \n";
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI)
+ (*HI)->print(O);
+
+ O << "Data: \n";
+ for (std::vector<HashData*>::const_iterator
+ DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+ (*DI)->print(O);
+
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 000000000000..2278d4c784f4
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,290 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "DIE.h"
+#include <vector>
+#include <map>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// | HEADER |
+// |-------------|
+// | BUCKETS |
+// |-------------|
+// | HASHES |
+// |-------------|
+// | OFFSETS |
+// |-------------|
+// | DATA |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+//
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+
+class DwarfAccelTable {
+
+ enum HashFunctionType {
+ eHashFunctionDJB = 0u
+ };
+
+ static uint32_t HashDJB (StringRef Str) {
+ uint32_t h = 5381;
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ h = ((h << 5) + h) + Str[i];
+ return h;
+ }
+
+ // Helper function to compute the number of buckets needed based on
+ // the number of unique hashes.
+ void ComputeBucketCount (void);
+
+ struct TableHeader {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
+ // Also written to disk is the implementation specific header data.
+
+ static const uint32_t MagicHash = 0x48415348;
+
+ TableHeader (uint32_t data_len) :
+ magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+ bucket_count (0), hashes_count (0), header_data_len (data_len)
+ {}
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+public:
+ // The HeaderData describes the form of each set of data. In general this
+ // is as a list of atoms (atom_count) where each atom contains a type
+ // (AtomType type) of data, and an encoding form (form). In the case of
+ // data that is referenced via DW_FORM_ref_* the die_offset_base is
+ // used to describe the offset for all forms in the list of atoms.
+ // This also serves as a public interface of sorts.
+ // When written to disk this will have the form:
+ //
+ // uint32_t die_offset_base
+ // uint32_t atom_count
+ // atom_count Atoms
+ enum AtomType {
+ eAtomTypeNULL = 0u,
+ eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
+ eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that
+ // contains the item in question
+ eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as
+ // DW_FORM_data1 (if no tags exceed 255) or
+ // DW_FORM_data2.
+ eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
+ eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags
+ };
+
+ enum TypeFlags {
+ eTypeFlagClassMask = 0x0000000fu,
+
+ // Always set for C++, only set for ObjC if this is the
+ // @implementation for a class.
+ eTypeFlagClassIsImplementation = ( 1u << 1 )
+ };
+
+ // Make these public so that they can be used as a general interface to
+ // the class.
+ struct Atom {
+ AtomType type; // enum AtomType
+ uint16_t form; // DWARF DW_FORM_ defines
+
+ Atom(AtomType type, uint16_t form) : type(type), form(form) {}
+ static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Type: " << AtomTypeString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ private:
+ struct TableHeaderData {
+
+ uint32_t die_offset_base;
+ std::vector<Atom> Atoms;
+
+ TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList,
+ uint32_t offset = 0) :
+ die_offset_base(offset) {
+ for (size_t i = 0, e = AtomList.size(); i != e; ++i)
+ Atoms.push_back(AtomList[i]);
+ }
+
+ TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
+ : die_offset_base(offset) {
+ Atoms.push_back(Atom);
+ }
+
+#ifndef NDEBUG
+ void print (raw_ostream &O) {
+ O << "die_offset_base: " << die_offset_base << "\n";
+ for (size_t i = 0; i < Atoms.size(); i++)
+ Atoms[i].print(O);
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ // The data itself consists of a str_offset, a count of the DIEs in the
+ // hash and the offsets to the DIEs themselves.
+ // On disk each data section is ended with a 0 KeyType as the end of the
+ // hash chain.
+ // On output this looks like:
+ // uint32_t str_offset
+ // uint32_t hash_data_count
+ // HashData[hash_data_count]
+public:
+ struct HashDataContents {
+ DIE *Die; // Offsets
+ char Flags; // Specific flags to output
+
+ HashDataContents(DIE *D, char Flags) :
+ Die(D),
+ Flags(Flags) { }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) const {
+ O << " Offset: " << Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+ O << " Flags: " << Flags << "\n";
+ }
+ #endif
+ };
+private:
+ struct HashData {
+ StringRef Str;
+ uint32_t HashValue;
+ MCSymbol *Sym;
+ std::vector<struct HashDataContents*> Data; // offsets
+ HashData(StringRef S) : Str(S) {
+ HashValue = DwarfAccelTable::HashDJB(S);
+ }
+ void addData(struct HashDataContents *Datum) { Data.push_back(Datum); }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Name: " << Str << "\n";
+ O << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ O << " Symbol: " ;
+ if (Sym) Sym->print(O);
+ else O << "<none>";
+ O << "\n";
+ for (size_t i = 0; i < Data.size(); i++) {
+ O << " Offset: " << Data[i]->Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
+ O << " Flags: " << Data[i]->Flags << "\n";
+ }
+ }
+ void dump() {
+ print(dbgs());
+ }
+ #endif
+ };
+
+ DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
+ void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT
+
+ // Internal Functions
+ void EmitHeader(AsmPrinter *);
+ void EmitBuckets(AsmPrinter *);
+ void EmitHashes(AsmPrinter *);
+ void EmitOffsets(AsmPrinter *, MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfDebug *D);
+
+ // Output Variables
+ TableHeader Header;
+ TableHeaderData HeaderData;
+ std::vector<HashData*> Data;
+
+ // String Data
+ typedef std::vector<struct HashDataContents*> DataArray;
+ typedef StringMap<DataArray> StringEntries;
+ StringEntries Entries;
+
+ // Buckets/Hashes/Offsets
+ typedef std::vector<HashData*> HashList;
+ typedef std::vector<HashList> BucketList;
+ BucketList Buckets;
+ HashList Hashes;
+
+ // Public Implementation
+ public:
+ DwarfAccelTable(DwarfAccelTable::Atom);
+ DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &);
+ ~DwarfAccelTable();
+ void AddName(StringRef, DIE*, char = 0);
+ void FinalizeTable(AsmPrinter *, const char *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 8ed4f4c43a7c..d975f1f97bea 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -142,12 +142,14 @@ void DwarfCFIException::EndFunction() {
Asm->OutStreamer.EmitCFIEndProc();
+ if (!shouldEmitPersonality)
+ return;
+
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
Asm->getFunctionNumber()));
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
- if (shouldEmitPersonality)
- EmitExceptionTable();
+ EmitExceptionTable();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6fe476d02ef7..69dc454ae1d7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,12 +13,14 @@
#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/Constants.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -30,8 +32,9 @@
using namespace llvm;
/// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
- : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
+ DwarfDebug *DW)
+ : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
@@ -67,12 +70,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
Die->addValue(Attribute, Form, Value);
}
-/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
-void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
- StringRef String) {
- DIEValue *Value = new (DIEValueAllocator) DIEString(String);
- Die->addValue(Attribute, Form, Value);
+/// addString - Add a string attribute data and value. We always emit a
+/// reference to the string pool instead of immediate strings so that DIEs have
+/// more predictable sizes.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+ MCSymbol *Symb = DD->getStringPoolEntry(String);
+ DIEValue *Value;
+ if (Asm->needsRelocationsForDwarfStringPool())
+ Value = new (DIEValueAllocator) DIELabel(Symb);
+ else {
+ MCSymbol *StringPool = DD->getStringPool();
+ Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+ }
+ Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
}
/// addLabel - Add a Dwarf label attribute data and value.
@@ -98,7 +108,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
Die->addValue(Attribute, Form, createDIEEntry(Entry));
}
-
/// addBlock - Add block data.
///
void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
@@ -135,8 +144,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(),
- G.getDirectory());
+ unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -148,14 +156,14 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
// Verify subprogram.
if (!SP.Verify())
return;
- // If the line number is 0, don't add it.
- if (SP.getLineNumber() == 0)
- return;
+ // If the line number is 0, don't add it.
unsigned Line = SP.getLineNumber();
- if (!SP.getContext().Verify())
+ if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+
+ unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
+ SP.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -169,9 +177,28 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
return;
unsigned Line = Ty.getLineNumber();
- if (Line == 0 || !Ty.getContext().Verify())
+ if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+ unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
+ Ty.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0)
+ return;
+ DIFile File = Ty.getFile();
+ unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
+ File.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -458,7 +485,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) {
/// addConstantValue - Add constant value entry in variable DIE.
bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
DIType Ty) {
- assert (MO.isImm() && "Invalid machine operand!");
+ assert(MO.isImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
int SizeInBits = -1;
bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
@@ -558,8 +585,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
Buffer.addChild(getOrCreateTemplateValueParameterDIE(
DITemplateValueParameter(Element)));
}
-
}
+
/// addToContextOwner - Add Die into the list of its context owner's children.
void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
if (Context.isType()) {
@@ -598,13 +625,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
assert(Ty.isDerivedType() && "Unknown kind of DIType");
constructTypeDIE(*TyDIE, DIDerivedType(Ty));
}
-
+ // If this is a named finished type then include it in the list of types
+ // for the accelerator tables.
+ if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+ bool IsImplementation = 0;
+ if (Ty.isCompositeType()) {
+ DICompositeType CT(Ty);
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = (CT.getRunTimeLang() == 0) ||
+ CT.isObjcClassComplete();
+ }
+ unsigned Flags = IsImplementation ?
+ DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+ addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+ }
+
addToContextOwner(TyDIE, Ty.getContext());
return TyDIE;
}
/// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty) {
+void CompileUnit::addType(DIE *Entity, DIType Ty,
+ unsigned Attribute) {
if (!Ty.Verify())
return;
@@ -612,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
DIEEntry *Entry = getDIEEntry(Ty);
// If it exists then use the existing value.
if (Entry) {
- Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
return;
}
@@ -622,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
// Set up proxy.
Entry = createDIEEntry(Buffer);
insertDIEEntry(Ty, Entry);
- Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
// If this is a complete composite type then include it in the
// list of global types.
@@ -662,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
StringRef Name = BTy.getName();
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(&Buffer, dwarf::DW_AT_name, Name);
if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
Buffer.setTag(dwarf::DW_TAG_unspecified_type);
@@ -671,8 +714,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
}
Buffer.setTag(dwarf::DW_TAG_base_type);
- addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- BTy.getEncoding());
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
uint64_t Size = BTy.getSizeInBits() >> 3;
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -696,10 +739,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(&Buffer, dwarf::DW_AT_name, Name);
// Add size if non-zero (derived types might be zero-sized.)
- if (Size)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
// Add source line info if available and TyDesc is not a forward declaration.
@@ -755,8 +798,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
Buffer.addChild(Arg);
}
}
- // Add prototype flag.
- if (isPrototyped)
+ // Add prototype flag if we're dealing with a C language and the
+ // function has been prototyped.
+ if (isPrototyped &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
}
break;
@@ -779,13 +826,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DISubprogram SP(Element);
ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
if (SP.isProtected())
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (SP.isPrivate())
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
else
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (SP.isExplicit())
addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
@@ -793,15 +840,54 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
else if (Element.isVariable()) {
DIVariable DV(Element);
ElemDie = new DIE(dwarf::DW_TAG_variable);
- addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- DV.getName());
+ addString(ElemDie, dwarf::DW_AT_name, DV.getName());
addType(ElemDie, DV.getType());
addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
addSourceLine(ElemDie, DV);
- } else if (Element.isDerivedType())
- ElemDie = createMemberDIE(DIDerivedType(Element));
- else
+ } else if (Element.isDerivedType()) {
+ DIDerivedType DDTy(Element);
+ if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+ ElemDie = new DIE(dwarf::DW_TAG_friend);
+ addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+ } else
+ ElemDie = createMemberDIE(DIDerivedType(Element));
+ } else if (Element.isObjCProperty()) {
+ DIObjCProperty Property(Element);
+ ElemDie = new DIE(Property.getTag());
+ StringRef PropertyName = Property.getObjCPropertyName();
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+ addType(ElemDie, Property.getType());
+ addSourceLine(ElemDie, Property);
+ StringRef GetterName = Property.getObjCPropertyGetterName();
+ if (!GetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+ StringRef SetterName = Property.getObjCPropertySetterName();
+ if (!SetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+ unsigned PropertyAttributes = 0;
+ if (Property.isReadOnlyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+ if (Property.isReadWriteObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+ if (Property.isAssignObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+ if (Property.isRetainObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+ if (Property.isCopyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+ if (Property.isNonAtomicObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+ if (PropertyAttributes)
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ PropertyAttributes);
+
+ DIEEntry *Entry = getDIEEntry(Element);
+ if (!Entry) {
+ Entry = createDIEEntry(ElemDie);
+ insertDIEEntry(Element, Entry);
+ }
+ } else
continue;
Buffer.addChild(ElemDie);
}
@@ -809,11 +895,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (CTy.isAppleBlockExtension())
addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
- unsigned RLang = CTy.getRunTimeLang();
- if (RLang)
- addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
- dwarf::DW_FORM_data1, RLang);
-
DICompositeType ContainingType = CTy.getContainingType();
if (DIDescriptor(ContainingType).isCompositeType())
addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
@@ -827,7 +908,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
dwarf::DW_FORM_flag, 1);
- if (Tag == dwarf::DW_TAG_class_type)
+ // Add template parameters to a class, structure or union types.
+ // FIXME: The support isn't in the metadata for this yet.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type)
addTemplateParams(Buffer, CTy.getTemplateParams());
break;
@@ -838,11 +923,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(&Buffer, dwarf::DW_AT_name, Name);
if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
|| Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
- {
+ {
// Add size if non-zero (derived types might be zero-sized.)
if (Size)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -857,6 +942,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add source line info if available.
if (!CTy.isForwardDecl())
addSourceLine(&Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
}
}
@@ -870,7 +961,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
addType(ParamDIE, TP.getType());
- addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+ addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
return ParamDIE;
}
@@ -885,7 +976,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV)
ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
addType(ParamDIE, TPV.getType());
if (!TPV.getName().empty())
- addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+ addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
TPV.getValue());
return ParamDIE;
@@ -898,8 +989,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
return NDie;
NDie = new DIE(dwarf::DW_TAG_namespace);
insertDIE(NS, NDie);
- if (!NS.getName().empty())
- addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+ if (!NS.getName().empty()) {
+ addString(NDie, dwarf::DW_AT_name, NS.getName());
+ addAccelNamespace(NS.getName(), NDie);
+ } else
+ addAccelNamespace("(anonymous namespace)", NDie);
addSourceLine(NDie, NS);
addToContextOwner(NDie, NS.getContext());
return NDie;
@@ -921,6 +1015,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (SPDie)
return SPDie;
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIE *DeclDie = NULL;
+ if (SPDecl.isSubprogram()) {
+ DeclDie = getOrCreateSubprogramDIE(SPDecl);
+ }
+
SPDie = new DIE(dwarf::DW_TAG_subprogram);
// DW_TAG_inlined_subroutine may refer to this DIE.
@@ -932,25 +1032,36 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add function template parameters.
addTemplateParams(*SPDie, SP.getTemplateParams());
+ // Unfortunately this code needs to stay here to work around
+ // a bug in older gdbs that requires the linkage name to resolve
+ // multiple template functions.
StringRef LinkageName = SP.getLinkageName();
if (!LinkageName.empty())
- addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
- dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
// If this DIE is going to refer declaration info using AT_specification
// then there is no need to add other attributes.
- if (SP.getFunctionDeclaration().isSubprogram())
+ if (DeclDie) {
+ // Refer function declaration directly.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ DeclDie);
+
return SPDie;
+ }
// Constructors and operators for anonymous aggregates do not have names.
if (!SP.getName().empty())
- addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- SP.getName());
+ addString(SPDie, dwarf::DW_AT_name, SP.getName());
addSourceLine(SPDie, SP);
- if (SP.isPrototyped())
+ // Add the prototype if we have a prototype and we have a C like
+ // language.
+ if (SP.isPrototyped() &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
// Add Return Type.
@@ -965,7 +1076,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
unsigned VK = SP.getVirtuality();
if (VK) {
- addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
DIEBlock *Block = getDIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
@@ -1052,31 +1163,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
insertDIE(N, VariableDIE);
// Add name.
- addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- GV.getDisplayName());
+ addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
StringRef LinkageName = GV.getLinkageName();
bool isGlobalVariable = GV.getGlobal() != NULL;
if (!LinkageName.empty() && isGlobalVariable)
- addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
- dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
// Add type.
DIType GTy = GV.getType();
addType(VariableDIE, GTy);
// Add scoping info.
- if (!GV.isLocalToUnit()) {
+ if (!GV.isLocalToUnit())
addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- // Expose as global.
- addGlobal(GV.getName(), VariableDIE);
- }
+
// Add line number info.
addSourceLine(VariableDIE, GV);
// Add to context owner.
DIDescriptor GVContext = GV.getContext();
addToContextOwner(VariableDIE, GVContext);
// Add location.
+ bool addToAccelTable = false;
+ DIE *VariableSpecDIE = NULL;
if (isGlobalVariable) {
+ addToAccelTable = true;
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
addLabel(Block, 0, dwarf::DW_FORM_udata,
@@ -1086,7 +1196,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
!GVContext.isFile() && !isSubprogramContext(GVContext)) {
// Create specification DIE.
- DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
dwarf::DW_FORM_ref4, VariableDIE);
addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
@@ -1095,11 +1205,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addDie(VariableSpecDIE);
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
- }
+ }
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant()))
addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ addToAccelTable = true;
// GV is a merged global.
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
Value *Ptr = CE->getOperand(0);
@@ -1114,6 +1225,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
+ if (addToAccelTable) {
+ DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
+ addAccelName(GV.getName(), AddrDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+ addAccelName(GV.getLinkageName(), AddrDIE);
+ }
+
return;
}
@@ -1121,8 +1242,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
- int64_t L = SR.getLo();
- int64_t H = SR.getHi();
+ uint64_t L = SR.getLo();
+ uint64_t H = SR.getHi();
// The L value defines the lower bounds which is typically zero for C/C++. The
// H value is the upper bounds. Values are 64 bit. H - L + 1 is the size
@@ -1135,8 +1256,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy)
return;
}
if (L)
- addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
- addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
Buffer.addChild(DW_Subrange);
}
@@ -1175,7 +1296,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
StringRef Name = ETy.getName();
- addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(Enumerator, dwarf::DW_AT_name, Name);
int64_t Value = ETy.getEnumValue();
addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
return Enumerator;
@@ -1212,8 +1333,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, AbsDIE);
else {
- addString(VariableDie, dwarf::DW_AT_name,
- dwarf::DW_FORM_string, Name);
+ addString(VariableDie, dwarf::DW_AT_name, Name);
addSourceLine(VariableDie, DV->getVariable());
addType(VariableDie, DV->getType());
}
@@ -1308,7 +1428,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
DIE *MemberDie = new DIE(DT.getTag());
StringRef Name = DT.getName();
if (!Name.empty())
- addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(MemberDie, dwarf::DW_AT_name, Name);
addType(MemberDie, DT.getTypeDerivedFrom());
@@ -1366,32 +1486,35 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
if (DT.isProtected())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (DT.isPrivate())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
else
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
- addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
dwarf::DW_VIRTUALITY_virtual);
// Objective-C properties.
+ if (MDNode *PNode = DT.getObjCProperty())
+ if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+ MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
+ PropertyDie);
+
+ // This is only for backward compatibility.
StringRef PropertyName = DT.getObjCPropertyName();
if (!PropertyName.empty()) {
- addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
- PropertyName);
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
StringRef GetterName = DT.getObjCPropertyGetterName();
if (!GetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
- dwarf::DW_FORM_string, GetterName);
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
StringRef SetterName = DT.getObjCPropertySetterName();
if (!SetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
- dwarf::DW_FORM_string, SetterName);
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
unsigned PropertyAttributes = 0;
if (DT.isReadOnlyObjCProperty())
PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 785926579fa4..45e407e27ffa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -29,13 +29,17 @@ class ConstantInt;
class DbgVariable;
//===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associate
+/// CompileUnit - This dwarf writer support class manages information associated
/// with a source file.
class CompileUnit {
/// ID - File identifier for source.
///
unsigned ID;
+ /// Language - The DW_AT_language of the compile unit
+ ///
+ unsigned Language;
+
/// Die - Compile unit debug information entry.
///
const OwningPtr<DIE> CUDie;
@@ -56,14 +60,17 @@ class CompileUnit {
/// descriptors to debug information entries using a DIEEntry proxy.
DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
- /// Globals - A map of globally visible named entities for this unit.
- ///
- StringMap<DIE*> Globals;
-
/// GlobalTypes - A map of globally visible types for this unit.
///
StringMap<DIE*> GlobalTypes;
+ /// AccelNames - A map of names for the name accelerator table.
+ ///
+ StringMap<std::vector<DIE*> > AccelNames;
+ StringMap<std::vector<DIE*> > AccelObjC;
+ StringMap<std::vector<DIE*> > AccelNamespace;
+ StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -73,27 +80,56 @@ class CompileUnit {
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
public:
- CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+ CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
~CompileUnit();
// Accessors.
unsigned getID() const { return ID; }
+ unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
- const StringMap<DIE*> &getGlobals() const { return Globals; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+ const StringMap<std::vector<DIE*> > &getAccelNames() const {
+ return AccelNames;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+ return AccelObjC;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+ return AccelNamespace;
+ }
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > >
+ &getAccelTypes() const {
+ return AccelTypes;
+ }
+
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
- /// addGlobal - Add a new global entity to the compile unit.
- ///
- void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
-
/// addGlobalType - Add a new global type to the compile unit.
///
void addGlobalType(DIType Ty);
+
+ /// addAccelName - Add a new name to the name accelerator table.
+ void addAccelName(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNames[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelObjC(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelObjC[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelNamespace(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNamespace[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+ std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+ DIEs.push_back(Die);
+ }
+
/// getDIE - Returns the debug information entry map slot for the
/// specified debug variable.
DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
@@ -150,8 +186,7 @@ public:
/// addString - Add a string attribute data and value.
///
- void addString(DIE *Die, unsigned Attribute, unsigned Form,
- const StringRef Str);
+ void addString(DIE *Die, unsigned Attribute, const StringRef Str);
/// addLabel - Add a Dwarf label attribute data and value.
///
@@ -178,6 +213,7 @@ public:
void addSourceLine(DIE *Die, DISubprogram SP);
void addSourceLine(DIE *Die, DIType Ty);
void addSourceLine(DIE *Die, DINameSpace NS);
+ void addSourceLine(DIE *Die, DIObjCProperty Ty);
/// addAddress - Add an address attribute to a die based on the location
/// provided.
@@ -225,8 +261,10 @@ public:
/// addToContextOwner - Add Die into the list of its context owner's children.
void addToContextOwner(DIE *Die, DIDescriptor Context);
- /// addType - Add a new type attribute to the specified entity.
- void addType(DIE *Entity, DIType Ty);
+ /// addType - Add a new type attribute to the specified entity. This takes
+ /// and attribute parameter because DW_AT_friend attributes are also
+ /// type references.
+ void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
DIE *getOrCreateNameSpace(DINameSpace NS);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1b7e370fca09..cb7887890cda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,10 +14,12 @@
#define DEBUG_TYPE "dwarfdebug"
#include "DwarfDebug.h"
#include "DIE.h"
+#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
#include "llvm/Instructions.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::desc("Make an absence of debug location information explicit."),
cl::init(false));
+static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+ cl::desc("Output prototype dwarf accelerator tables."),
+ cl::init(false));
+
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
@@ -128,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfStrSectionSym = TextSectionSym = 0;
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
+
+ // Turn on accelerator tables for Darwin.
+ if (Triple(M->getTargetTriple()).isOSDarwin())
+ DwarfAccelTables = true;
+
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
beginModule(M);
@@ -136,6 +147,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfDebug::~DwarfDebug() {
}
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+ const char *SymbolStem = 0) {
+ Asm->OutStreamer.SwitchSection(Section);
+ if (!SymbolStem) return 0;
+
+ MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+ Asm->OutStreamer.EmitLabel(TmpSym);
+ return TmpSym;
+}
+
+MCSymbol *DwarfDebug::getStringPool() {
+ return Asm->GetTempSymbol("section_str");
+}
+
MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
if (Entry.first) return Entry.first;
@@ -144,7 +171,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
return Entry.first = Asm->GetTempSymbol("string", Entry.second);
}
-
/// assignAbbrevNumber - Define a unique number for the abbreviation.
///
void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -178,6 +204,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
return LinkageName;
}
+static bool isObjCClass(StringRef Name) {
+ return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+ if (!isObjCClass(Name)) return false;
+
+ size_t pos = Name.find(')');
+ if (pos != std::string::npos) {
+ if (Name[pos+1] != ' ') return false;
+ return true;
+ }
+ return false;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+ StringRef &Category) {
+ if (!hasObjCCategory(In)) {
+ Class = In.slice(In.find('[') + 1, In.find(' '));
+ Category = "";
+ return;
+ }
+
+ Class = In.slice(In.find('[') + 1, In.find('('));
+ Category = In.slice(In.find('[') + 1, In.find(' '));
+ return;
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+ return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
+ DIE* Die) {
+ if (!SP.isDefinition()) return;
+
+ TheCU->addAccelName(SP.getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
+ TheCU->addAccelName(SP.getLinkageName(), Die);
+
+ // If this is an Objective-C selector name add it to the ObjC accelerator
+ // too.
+ if (isObjCClass(SP.getName())) {
+ StringRef Class, Category;
+ getObjCClassCategory(SP.getName(), Class, Category);
+ TheCU->addAccelObjC(Class, Die);
+ if (Category != "")
+ TheCU->addAccelObjC(Category, Die);
+ // Also add the base method name to the name table.
+ TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+ }
+}
+
/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
@@ -190,11 +273,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
DISubprogram SP(SPNode);
DISubprogram SPDecl = SP.getFunctionDeclaration();
- if (SPDecl.isSubprogram())
- // Refer function declaration directly.
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
- SPCU->getOrCreateSubprogramDIE(SPDecl));
- else {
+ if (!SPDecl.isSubprogram()) {
// There is not any need to generate specification DIE for a function
// defined at compile unit level. If a function is defined inside another
// function then gdb prefers the definition at top level and but does not
@@ -203,7 +282,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
!SP.getContext().isFile() &&
!isSubprogramContext(SP.getContext())) {
- SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add arguments.
DICompositeType SPTy = SP.getType();
@@ -241,6 +320,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ addSubprogramNames(SPCU, SP, SPDie);
+
return SPDie;
}
@@ -248,7 +331,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
LexicalScope *Scope) {
-
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
return ScopeDIE;
@@ -294,10 +376,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
/// of the function.
DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
LexicalScope *Scope) {
-
const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
- assert (Ranges.empty() == false
- && "LexicalScope does not have instruction markers!");
+ assert(Ranges.empty() == false &&
+ "LexicalScope does not have instruction markers!");
if (!Scope->getScopeNode())
return NULL;
@@ -314,8 +395,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
if (StartLabel == 0 || EndLabel == 0) {
- assert (0 && "Unexpected Start and End labels for a inlined scope!");
- return 0;
+ llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
}
assert(StartLabel->isDefined() &&
"Invalid starting label for an inlined scope!");
@@ -358,16 +438,20 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
I = InlineInfo.find(InlinedSP);
if (I == InlineInfo.end()) {
- InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
- ScopeDIE));
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
InlinedSPNodes.push_back(InlinedSP);
} else
I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
DILocation DL(Scope->getInlinedAt());
- TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+ GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+
return ScopeDIE;
}
@@ -376,7 +460,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
- SmallVector <DIE *, 8> Children;
+ SmallVector<DIE *, 8> Children;
// Collect arguments for current function.
if (LScopes.isCurrentFunctionScope(Scope))
@@ -426,39 +510,39 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
ScopeDIE->addChild(*I);
if (DS.isSubprogram())
- TheCU->addPubTypes(DISubprogram(DS));
+ TheCU->addPubTypes(DISubprogram(DS));
- return ScopeDIE;
+ return ScopeDIE;
}
/// GetOrCreateSourceID - Look up the source id with the given directory and
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
/// maps as well.
-
unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
StringRef DirName) {
// If FE did not provide a file name, then assume stdin.
if (FileName.empty())
return GetOrCreateSourceID("<stdin>", StringRef());
- // MCStream expects full path name as filename.
- if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
- SmallString<128> FullPathName = DirName;
- sys::path::append(FullPathName, FileName);
- // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
- return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
- }
+ // TODO: this might not belong here. See if we can factor this better.
+ if (DirName == CompilationDir)
+ DirName = "";
- StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
- if (Entry.getValue())
- return Entry.getValue();
+ unsigned SrcId = SourceIdMap.size()+1;
- unsigned SrcId = SourceIdMap.size();
- Entry.setValue(SrcId);
+ // We look up the file/dir pair by concatenating them with a zero byte.
+ SmallString<128> NamePair;
+ NamePair += DirName;
+ NamePair += '\0'; // Zero bytes are not allowed in paths.
+ NamePair += FileName;
+
+ StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+ if (Ent.getValue() != SrcId)
+ return Ent.getValue();
// Print out a .file directive to specify files for .loc directives.
- Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
return SrcId;
}
@@ -468,39 +552,36 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
- StringRef Dir = DIUnit.getDirectory();
- unsigned ID = GetOrCreateSourceID(FN, Dir);
+ CompilationDir = DIUnit.getDirectory();
+ unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
- NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
- DIUnit.getProducer());
+ CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+ NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit.getLanguage());
- NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
- // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
- // simplifies debug range entries.
- NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+ NewCU->addString(Die, dwarf::DW_AT_name, FN);
+ // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+ // into an entity.
+ NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
- if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+ if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
Asm->GetTempSymbol("section_line"));
else
NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
- if (!Dir.empty())
- NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (!CompilationDir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
if (DIUnit.isOptimized())
NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
StringRef Flags = DIUnit.getFlags();
if (!Flags.empty())
- NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string,
- Flags);
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
- unsigned RVer = DIUnit.getRunTimeVersion();
- if (RVer)
+ if (unsigned RVer = DIUnit.getRunTimeVersion())
NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
@@ -513,6 +594,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
/// construct SubprogramDIE - Construct subprogram DIE.
void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
const MDNode *N) {
+ CompileUnit *&CURef = SPMap[N];
+ if (CURef)
+ return;
+ CURef = TheCU;
+
DISubprogram SP(N);
if (!SP.isDefinition())
// This is a method declaration which will be handled while constructing
@@ -527,10 +613,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
// Add to context owner.
TheCU->addToContextOwner(SubprogramDie, SP.getContext());
- // Expose as global.
- TheCU->addGlobal(SP.getName(), SubprogramDie);
-
- SPMap[N] = TheCU;
return;
}
@@ -676,7 +758,7 @@ void DwarfDebug::endModule() {
// Construct subprogram DIE and add variables DIEs.
CompileUnit *SPCU = CUMap.lookup(TheCU);
- assert (SPCU && "Unable to find Compile Unit!");
+ assert(SPCU && "Unable to find Compile Unit!");
constructSubprogramDIE(SPCU, SP);
DIE *ScopeDIE = SPCU->getDIE(SP);
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
@@ -697,6 +779,13 @@ void DwarfDebug::endModule() {
DIE *ISP = *AI;
FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
}
+ for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+ AE = AbstractSPDies.end(); AI != AE; ++AI) {
+ DIE *ISP = AI->second;
+ if (InlinedSubprogramDIEs.count(ISP))
+ continue;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
@@ -727,9 +816,14 @@ void DwarfDebug::endModule() {
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit info into a debug pubnames section.
- emitDebugPubNames();
-
+ // Emit info into a dwarf accelerator table sections.
+ if (DwarfAccelTables) {
+ emitAccelNames();
+ emitAccelObjC();
+ emitAccelNamespaces();
+ emitAccelTypes();
+ }
+
// Emit info into a debug pubtypes section.
emitDebugPubTypes();
@@ -837,7 +931,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
/// isDbgValueInDefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in a defined reg.
static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
- assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
return MI->getNumOperands() == 3 &&
MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
@@ -867,8 +961,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
if (MI->getOperand(0).isCImm())
return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
- assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
- return DotDebugLocEntry();
+ llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
}
/// collectVariableInfo - Find variables for each lexical scope.
@@ -964,7 +1057,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
}
// The value is valid until the next DBG_VALUE or clobber.
- DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+ DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
+ Begin));
}
DotDebugLocEntries.push_back(DotDebugLocEntry());
}
@@ -999,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (!MI->isDebugValue()) {
DebugLoc DL = MI->getDebugLoc();
if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
- unsigned Flags = DWARF2_FLAG_IS_STMT;
+ unsigned Flags = 0;
PrevInstLoc = DL;
if (DL == PrologEndLoc) {
Flags |= DWARF2_FLAG_PROLOGUE_END;
PrologEndLoc = DebugLoc();
}
+ if (PrologEndLoc.isUnknown())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
if (!DL.isUnknown()) {
const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
@@ -1099,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
}
/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number info for the function.
+/// line number info for the function.
static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
const MDNode *Scope = getScopeNode(DL, Ctx);
DISubprogram SP = getDISubprogram(Scope);
- if (SP.Verify())
- return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ if (SP.Verify()) {
+ // Check for number of operands since the compatibility is
+ // cheap here.
+ if (SP->getNumOperands() > 19)
+ return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+ else
+ return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ }
+
return DebugLoc();
}
@@ -1135,7 +1239,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
const MachineInstr *MI = II;
if (MI->isDebugValue()) {
- assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+ assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
// Keep track of user variables.
const MDNode *Var =
@@ -1206,7 +1310,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
continue;
- for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+ for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg());
unsigned Reg = *AI; ++AI) {
const MDNode *Var = LiveUserVar[Reg];
if (!Var)
@@ -1277,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MF->getFunction()->getContext());
recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
FnStartDL.getScope(MF->getFunction()->getContext()),
- DWARF2_FLAG_IS_STMT);
+ 0);
}
}
@@ -1303,7 +1407,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
- assert (TheCU && "Unable to find compile unit!");
+ assert(TheCU && "Unable to find compile unit!");
// Construct abstract scopes.
ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
@@ -1327,7 +1431,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
- if (!DisableFramePointerElim(*MF))
+ if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
dwarf::DW_FORM_flag, 1);
@@ -1380,7 +1484,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
Fn = DB.getFilename();
Dir = DB.getDirectory();
} else
- assert(0 && "Unexpected scope info");
+ llvm_unreachable("Unexpected scope info");
Src = GetOrCreateSourceID(Fn, Dir);
}
@@ -1398,10 +1502,6 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
// Get the children.
const std::vector<DIE *> &Children = Die->getChildren();
- // If not last sibling and has children then add sibling offset attribute.
- if (!Last && !Children.empty())
- Die->addSiblingOffset(DIEValueAllocator);
-
// Record the abbreviation.
assignAbbrevNumber(Die->getAbbrev());
@@ -1454,18 +1554,6 @@ void DwarfDebug::computeSizeAndOffsets() {
}
}
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
- const char *SymbolStem = 0) {
- Asm->OutStreamer.SwitchSection(Section);
- if (!SymbolStem) return 0;
-
- MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
- Asm->OutStreamer.EmitLabel(TmpSym);
- return TmpSym;
-}
-
/// EmitSectionLabels - Emit initial Dwarf sections with a label at
/// the start of each one.
void DwarfDebug::EmitSectionLabels() {
@@ -1483,7 +1571,6 @@ void DwarfDebug::EmitSectionLabels() {
EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
EmitSectionSym(Asm, TLOF.getDwarfLocSection());
- EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
DwarfStrSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
@@ -1525,9 +1612,6 @@ void DwarfDebug::emitDIE(DIE *Die) {
Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
switch (Attr) {
- case dwarf::DW_AT_sibling:
- Asm->EmitInt32(Die->getSiblingOffset());
- break;
case dwarf::DW_AT_abstract_origin: {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
@@ -1539,7 +1623,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
// DW_AT_range Value encodes offset in debug_range section.
DIEInteger *V = cast<DIEInteger>(Values[i]);
- if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+ if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) {
Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
V->getValue(),
4);
@@ -1678,62 +1762,133 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(1);
}
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames() {
+/// emitAccelNames - Emit visible names into a hashed accelerator table
+/// section.
+void DwarfDebug::emitAccelNames() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
- // Start the dwarf pubnames section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubNamesSection());
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
- Asm->OutStreamer.AddComment("Length of Public Names Info");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
- Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+ AT.FinalizeTable(Asm, "Names");
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAccelNamesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
- TheCU->getID()));
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
+}
- Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
+/// emitAccelObjC - Emit objective C classes and categories into a hashed
+/// accelerator table section.
+void DwarfDebug::emitAccelObjC() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
- Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
- DwarfInfoSectionSym);
+ AT.FinalizeTable(Asm, "ObjC");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelObjCSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
- Asm->OutStreamer.AddComment("Compilation Unit Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
- Asm->GetTempSymbol("info_begin", TheCU->getID()),
- 4);
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
+}
- const StringMap<DIE*> &Globals = TheCU->getGlobals();
- for (StringMap<DIE*>::const_iterator
- GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+/// table.
+void DwarfDebug::emitAccelNamespaces() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
- DIE *Entity = GI->second;
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
- Asm->OutStreamer.AddComment("DIE offset");
- Asm->EmitInt32(Entity->getOffset());
+ AT.FinalizeTable(Asm, "namespac");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelNamespaceSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
- if (Asm->isVerbose())
- Asm->OutStreamer.AddComment("External Name");
- Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
- }
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
+}
- Asm->OutStreamer.AddComment("End Mark");
- Asm->EmitInt32(0);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
- TheCU->getID()));
+/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+ std::vector<DwarfAccelTable::Atom> Atoms;
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+ dwarf::DW_FORM_data2));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+ dwarf::DW_FORM_data1));
+ DwarfAccelTable AT(Atoms);
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
+ = TheCU->getAccelTypes();
+ for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
+ for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
+ = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
+ AT.AddName(Name, (*DI).first, (*DI).second);
+ }
}
+
+ AT.FinalizeTable(Asm, "types");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelTypesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
}
void DwarfDebug::emitDebugPubTypes() {
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
- // Start the dwarf pubnames section.
+ // Start the dwarf pubtypes section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfPubTypesSection());
Asm->OutStreamer.AddComment("Length of Public Types Info");
@@ -1766,6 +1921,7 @@ void DwarfDebug::emitDebugPubTypes() {
Asm->EmitInt32(Entity->getOffset());
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ // Emit the name with a terminating null byte.
Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
}
@@ -1801,8 +1957,10 @@ void DwarfDebug::emitDebugStr() {
// Emit a label for reference from debug information entries.
Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
- // Emit the string itself.
- Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+ // Emit the string itself with a terminating null byte.
+ Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
+ Entries[i].second->getKeyLength()+1),
+ 0/*addrspace*/);
}
}
@@ -1958,7 +2116,7 @@ void DwarfDebug::emitDebugMacInfo() {
/// __debug_info section, and the low_pc is the starting address for the
/// inlining instance.
void DwarfDebug::emitDebugInlineInfo() {
- if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+ if (!Asm->MAI->doesDwarfUseInlineInfoSection())
return;
if (!FirstCU)
@@ -1990,10 +2148,9 @@ void DwarfDebug::emitDebugInlineInfo() {
StringRef Name = SP.getName();
Asm->OutStreamer.AddComment("MIPS linkage name");
- if (LName.empty()) {
- Asm->OutStreamer.EmitBytes(Name, 0);
- Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
- } else
+ if (LName.empty())
+ Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+ else
Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
DwarfStrSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 35653be5c897..83f30f5b446f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,7 +30,8 @@
namespace llvm {
class CompileUnit;
-class DbgConcreteScope;
+class ConstantInt;
+class ConstantFP;
class DbgVariable;
class MachineFrameInfo;
class MachineModuleInfo;
@@ -207,8 +208,8 @@ class DwarfDebug {
///
std::vector<DIEAbbrev *> Abbreviations;
- /// SourceIdMap - Source id map, i.e. pair of directory id and source file
- /// id mapped to a unique id.
+ /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
+ /// separated by a zero byte, mapped to a unique id.
StringMap<unsigned> SourceIdMap;
/// StringPool - A String->Symbol mapping of strings used by indirect
@@ -216,8 +217,6 @@ class DwarfDebug {
StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
unsigned NextStringPoolNumber;
- MCSymbol *getStringPoolEntry(StringRef Str);
-
/// SectionMap - Provides a unique id per text section.
///
UniqueVector<const MCSection*> SectionMap;
@@ -239,12 +238,12 @@ class DwarfDebug {
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
- /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+ /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
/// (at the end of the module) as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
/// InlineInfo - Keep track of inlined functions and their location. This
- /// information is used to populate debug_inlined section.
+ /// information is used to populate the debug_inlined section.
typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
SmallVector<const MDNode *, 4> InlinedSPNodes;
@@ -304,6 +303,10 @@ class DwarfDebug {
MCSymbol *DwarfDebugLocSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
+ // As an optimization, there is no need to emit an entry in the directory
+ // table for the same directory as DW_at_comp_dir.
+ StringRef CompilationDir;
+
private:
/// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -340,7 +343,7 @@ private:
/// the start of each one.
void EmitSectionLabels();
- /// emitDIE - Recusively Emits a debug information entry.
+ /// emitDIE - Recursively Emits a debug information entry.
///
void emitDIE(DIE *Die);
@@ -365,10 +368,22 @@ private:
///
void emitEndOfLineMatrix(unsigned SectionEnd);
- /// emitDebugPubNames - Emit visible names into a debug pubnames section.
- ///
- void emitDebugPubNames();
+ /// emitAccelNames - Emit visible names into a hashed accelerator table
+ /// section.
+ void emitAccelNames();
+
+ /// emitAccelObjC - Emit objective C classes and categories into a hashed
+ /// accelerator table section.
+ void emitAccelObjC();
+
+ /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+ /// table.
+ void emitAccelNamespaces();
+ /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+ ///
+ void emitAccelTypes();
+
/// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
///
void emitDebugPubTypes();
@@ -407,10 +422,10 @@ private:
/// 3. an unsigned LEB128 number indicating the number of distinct inlining
/// instances for the function.
///
- /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
/// inlined instance; the die_offset points to the inlined_subroutine die in
- /// the __debug_info section, and the low_pc is the starting address for the
- /// inlining instance.
+ /// the __debug_info section, and the low_pc is the starting address for the
+ /// inlining instance.
void emitDebugInlineInfo();
/// constructCompileUnit - Create new CompileUnit for the given
@@ -426,8 +441,8 @@ private:
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
- /// identifyScopeMarkers() - Indentify instructions that are marking
- /// beginning of or end of a scope.
+ /// identifyScopeMarkers() - Indentify instructions that are marking the
+ /// beginning of or ending of a scope.
void identifyScopeMarkers();
/// addCurrentFnArgument - If Var is an current function argument that add
@@ -472,7 +487,7 @@ public:
void collectInfoFromNamedMDNodes(Module *M);
/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
- /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+ /// FIXME - Remove this when DragonEgg switches to DIBuilder.
bool collectLegacyDebugInfo(Module *M);
/// beginModule - Emit all Dwarf sections that should come prior to the
@@ -504,6 +519,13 @@ public:
/// createSubprogramDIE - Create new DIE using SP.
DIE *createSubprogramDIE(DISubprogram SP);
+
+ /// getStringPool - returns the entry into the start of the pool.
+ MCSymbol *getStringPool();
+
+ /// getStringPoolEntry - returns an entry into the string pool with the given
+ /// string text.
+ MCSymbol *getStringPoolEntry(StringRef Str);
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 18b726b173dc..70cc2e56b3e1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -31,6 +31,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -184,7 +185,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
/// CallToNoUnwindFunction - Return `true' if this is a call to a function
/// marked `nounwind'. Return `false' otherwise.
bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
- assert(MI->getDesc().isCall() && "This should be a call instruction!");
+ assert(MI->isCall() && "This should be a call instruction!");
bool MarkedNoUnwind = false;
bool SawFunc = false;
@@ -243,7 +244,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- if (MI->getDesc().isCall())
+ if (MI->isCall())
SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
continue;
}
@@ -529,10 +530,8 @@ void DwarfException::EmitExceptionTable() {
// Offset of the landing pad, counted in 16-byte bundles relative to the
// @LPStart address.
if (VerboseAsm) {
- Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
- llvm::utostr(idx) + " <<");
- Asm->OutStreamer.AddComment(Twine(" On exception at call site ") +
- llvm::utostr(idx));
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
+ Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx));
}
Asm->EmitULEB128(idx);
@@ -543,8 +542,8 @@ void DwarfException::EmitExceptionTable() {
if (S.Action == 0)
Asm->OutStreamer.AddComment(" Action: cleanup");
else
- Asm->OutStreamer.AddComment(Twine(" Action: ") +
- llvm::utostr((S.Action - 1) / 2 + 1));
+ Asm->OutStreamer.AddComment(" Action: " +
+ Twine((S.Action - 1) / 2 + 1));
}
Asm->EmitULEB128(S.Action);
}
@@ -596,8 +595,7 @@ void DwarfException::EmitExceptionTable() {
// number of 16-byte bundles. The first call site is counted relative to
// the start of the procedure fragment.
if (VerboseAsm)
- Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
- llvm::utostr(++Entry) + " <<");
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
if (VerboseAsm)
Asm->OutStreamer.AddComment(Twine(" Call between ") +
@@ -625,8 +623,8 @@ void DwarfException::EmitExceptionTable() {
if (S.Action == 0)
Asm->OutStreamer.AddComment(" On action: cleanup");
else
- Asm->OutStreamer.AddComment(Twine(" On action: ") +
- llvm::utostr((S.Action - 1) / 2 + 1));
+ Asm->OutStreamer.AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
}
Asm->EmitULEB128(S.Action);
}
@@ -640,8 +638,7 @@ void DwarfException::EmitExceptionTable() {
if (VerboseAsm) {
// Emit comments that decode the action table.
- Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
- llvm::utostr(++Entry) + " <<");
+ Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
}
// Type Filter
@@ -650,11 +647,11 @@ void DwarfException::EmitExceptionTable() {
// type of the catch clauses or the types in the exception specification.
if (VerboseAsm) {
if (Action.ValueForTypeID > 0)
- Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") +
- llvm::itostr(Action.ValueForTypeID));
+ Asm->OutStreamer.AddComment(" Catch TypeInfo " +
+ Twine(Action.ValueForTypeID));
else if (Action.ValueForTypeID < 0)
- Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") +
- llvm::itostr(Action.ValueForTypeID));
+ Asm->OutStreamer.AddComment(" Filter TypeInfo " +
+ Twine(Action.ValueForTypeID));
else
Asm->OutStreamer.AddComment(" Cleanup");
}
@@ -669,8 +666,7 @@ void DwarfException::EmitExceptionTable() {
Asm->OutStreamer.AddComment(" No further actions");
} else {
unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
- Asm->OutStreamer.AddComment(Twine(" Continue to action ") +
- llvm::utostr(NextAction));
+ Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction));
}
}
Asm->EmitSLEB128(Action.NextAction);
@@ -687,7 +683,7 @@ void DwarfException::EmitExceptionTable() {
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
const GlobalVariable *GV = *I;
if (VerboseAsm)
- Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
if (GV)
Asm->EmitReference(GV, TTypeEncoding);
else
@@ -707,7 +703,7 @@ void DwarfException::EmitExceptionTable() {
if (VerboseAsm) {
--Entry;
if (TypeID != 0)
- Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
}
Asm->EmitULEB128(TypeID);
@@ -719,17 +715,17 @@ void DwarfException::EmitExceptionTable() {
/// EndModule - Emit all exception information that should come after the
/// content.
void DwarfException::EndModule() {
- assert(0 && "Should be implemented");
+ llvm_unreachable("Should be implemented");
}
/// BeginFunction - Gather pre-function exception information. Assumes it's
/// being emitted immediately after the function entry point.
void DwarfException::BeginFunction(const MachineFunction *MF) {
- assert(0 && "Should be implemented");
+ llvm_unreachable("Should be implemented");
}
/// EndFunction - Gather and emit post-function exception information.
///
void DwarfException::EndFunction() {
- assert(0 && "Should be implemented");
+ llvm_unreachable("Should be implemented");
}
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..20b1f7b45b31
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/AsmPrinter/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AsmPrinter
+parent = Libraries
+required_libraries = Analysis CodeGen Core MC MCParser Support Target
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 75288b0934cb..ef1d2baed9ce 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size",
namespace {
/// BranchFolderPass - Wrap branch folder in a machine function pass.
- class BranchFolderPass : public MachineFunctionPass,
- public BranchFolder {
+ class BranchFolderPass : public MachineFunctionPass {
public:
static char ID;
- explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
+ explicit BranchFolderPass(): MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
}
char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
- return new BranchFolderPass(DefaultEnableTailMerge);
-}
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+ "Control Flow Optimizer", false, false)
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
- return OptimizeFunction(MF,
- MF.getTarget().getInstrInfo(),
- MF.getTarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+ return Folder.OptimizeFunction(MF,
+ MF.getTarget().getInstrInfo(),
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -132,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
break;
unsigned Reg = I->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
ImpDefRegs.insert(SubReg);
++I;
@@ -179,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
TII = tii;
TRI = tri;
MMI = mmi;
+ RS = NULL;
- RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+ // Use a RegScavenger to help update liveness when required.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF))
+ RS = new RegScavenger();
+ else
+ MRI.invalidateLiveness();
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
@@ -208,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
delete RS;
return MadeChange;
}
-
+
// Walk the function to find jump tables that are live.
BitVector JTIsLive(JTI->getJumpTables().size());
for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
@@ -432,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
for (; I != E; ++I) {
if (I->isDebugValue())
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall())
+ if (I->isCall())
Time += 10;
- else if (MCID.mayLoad() || MCID.mayStore())
+ else if (I->mayLoad() || I->mayStore())
Time += 2;
else
++Time;
@@ -484,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
// an object with itself.
#ifndef _GLIBCXX_DEBUG
llvm_unreachable("Predecessor appears twice");
-#endif
+#else
return false;
+#endif
}
}
@@ -502,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
break;
}
--I;
- if (!I->getDesc().isTerminator()) break;
+ if (!I->isTerminator()) break;
++NumTerms;
}
return NumTerms;
@@ -550,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// heuristics.
unsigned EffectiveTailLen = CommonTailLen;
if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
- !MBB1->back().getDesc().isBarrier() &&
- !MBB2->back().getDesc().isBarrier())
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
++EffectiveTailLen;
// Check if the common tail is long enough to be worthwhile.
@@ -870,6 +881,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Visit each predecessor only once.
if (!UniquePreds.insert(PBB))
continue;
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->getLandingPadSuccessor())
+ continue;
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
@@ -924,8 +938,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(IBB, PredBB);
// Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ // The 1 below can occur as a result of removing blocks in
+ // TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->getBlock() != PredBB)
FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -980,7 +995,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
if (!MBBI->isDebugValue())
break;
}
- return (MBBI->getDesc().isBranch());
+ return (MBBI->isBranch());
}
/// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1008,7 +1023,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock::iterator MBB2I = --MBB2->end();
while (MBB2I->isDebugValue())
--MBB2I;
- return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+ return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return DebugLoc();
+ --I;
+ while (I->isDebugValue() && I != MBB.begin())
+ --I;
+ if (I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
}
/// OptimizeBlock - Analyze and optimize control flow related to the specified
@@ -1016,7 +1047,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
- DebugLoc dl; // FIXME: this is nowhere
ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
@@ -1065,6 +1095,7 @@ ReoptimizeBlock:
// destination, remove the branch, replacing it with an unconditional one or
// a fall-through.
if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
@@ -1091,7 +1122,7 @@ ReoptimizeBlock:
MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
--PrevBBIter;
MachineBasicBlock::iterator MBBIter = MBB->begin();
- // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
// DBG_VALUE at the beginning of MBB.
while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
&& PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
@@ -1103,7 +1134,7 @@ ReoptimizeBlock:
}
}
PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
- PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ PrevBB.removeSuccessor(PrevBB.succ_begin());
assert(PrevBB.succ_empty());
PrevBB.transferSuccessors(MBB);
MadeChange = true;
@@ -1122,6 +1153,7 @@ ReoptimizeBlock:
// If the prior block branches somewhere else on the condition and here if
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
@@ -1135,6 +1167,7 @@ ReoptimizeBlock:
if (PriorTBB == MBB) {
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
MadeChange = true;
@@ -1172,6 +1205,7 @@ ReoptimizeBlock:
DEBUG(dbgs() << "\nMoving MBB: " << *MBB
<< "To make fallthrough to: " << *PriorTBB << "\n");
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
@@ -1201,6 +1235,7 @@ ReoptimizeBlock:
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->ReverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
TII->RemoveBranch(*MBB);
TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
MadeChange = true;
@@ -1214,6 +1249,7 @@ ReoptimizeBlock:
if (CurTBB && CurCond.empty() && CurFBB == 0 &&
IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
!MBB->hasAddressTaken()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
// This block may contain just an unconditional branch. Because there can
// be 'non-branch terminators' in the block, try removing the branch and
// then seeing if the block is empty.
@@ -1256,8 +1292,9 @@ ReoptimizeBlock:
assert(PriorFBB == 0 && "Machine CFG out of date!");
PriorFBB = MBB;
}
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
}
// Iterate through all the predecessors, revectoring each in-turn.
@@ -1281,9 +1318,10 @@ ReoptimizeBlock:
bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
TII->RemoveBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
MadeChange = true;
++NumBranchOpts;
PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1343,7 +1381,7 @@ ReoptimizeBlock:
if (CurFallsThru) {
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
}
MBB->moveAfter(PredBB);
MadeChange = true;
@@ -1446,7 +1484,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
continue;
if (MO.isUse()) {
Uses.insert(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Uses.insert(*AS);
} else if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
@@ -1469,6 +1507,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
bool IsDef = false;
for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
const MachineOperand &MO = PI->getOperand(i);
+ // If PI has a regmask operand, it is probably a call. Separate away.
+ if (MO.isRegMask())
+ return Loc;
if (!MO.isReg() || MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -1505,16 +1546,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
continue;
if (MO.isUse()) {
Uses.insert(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Uses.insert(*AS);
} else {
if (Uses.count(Reg)) {
Uses.erase(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
Uses.erase(*SR); // Use getSubRegisters to be conservative
}
Defs.insert(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Defs.insert(*AS);
}
}
@@ -1581,6 +1622,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
bool IsSafe = true;
for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
MachineOperand &MO = TIB->getOperand(i);
+ // Don't attempt to hoist instructions with register masks.
+ if (MO.isRegMask()) {
+ IsSafe = false;
+ break;
+ }
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -1615,6 +1661,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
IsSafe = false;
break;
}
+
+ if (MO.isKill() && Uses.count(Reg))
+ // Kills a register that's read by the instruction at the point of
+ // insertion. Remove the kill marker.
+ MO.setIsKill(false);
}
}
if (!IsSafe)
@@ -1632,7 +1683,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
LocalDefsSet.erase(*OR);
}
@@ -1645,11 +1696,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
LocalDefsSet.insert(*OR);
}
- HasDups = true;;
+ HasDups = true;
++TIB;
++FIB;
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9a5e55160114..21729cd6c380 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,10 +9,9 @@ add_llvm_library(LLVMCodeGen
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
+ DFAPacketizer.cpp
DwarfEHPrepare.cpp
EdgeBundles.cpp
- ELFCodeEmitter.cpp
- ELFWriter.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
@@ -23,6 +22,7 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
+ JITCodeEmitter.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
LexicalScopes.cpp
@@ -37,7 +37,10 @@ add_llvm_library(LLVMCodeGen
LocalStackSlotAllocation.cpp
MachineBasicBlock.cpp
MachineBlockFrequencyInfo.cpp
+ MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
+ MachineCodeEmitter.cpp
+ MachineCopyPropagation.cpp
MachineCSE.cpp
MachineDominators.cpp
MachineFunction.cpp
@@ -45,6 +48,7 @@ add_llvm_library(LLVMCodeGen
MachineFunctionPass.cpp
MachineFunctionPrinterPass.cpp
MachineInstr.cpp
+ MachineInstrBundle.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
MachineLoopRanges.cpp
@@ -53,9 +57,9 @@ add_llvm_library(LLVMCodeGen
MachinePassRegistry.cpp
MachineRegisterInfo.cpp
MachineSSAUpdater.cpp
+ MachineScheduler.cpp
MachineSink.cpp
MachineVerifier.cpp
- ObjectCodeEmitter.cpp
OcamlGC.cpp
OptimizePHIs.cpp
PHIElimination.cpp
@@ -66,17 +70,16 @@ add_llvm_library(LLVMCodeGen
ProcessImplicitDefs.cpp
PrologEpilogInserter.cpp
PseudoSourceValue.cpp
+ RegAllocBase.cpp
RegAllocBasic.cpp
RegAllocFast.cpp
RegAllocGreedy.cpp
- RegAllocLinearScan.cpp
RegAllocPBQP.cpp
RegisterClassInfo.cpp
RegisterCoalescer.cpp
RegisterScavenging.cpp
RenderMachineFunction.cpp
ScheduleDAG.cpp
- ScheduleDAGEmit.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
ScoreboardHazardRecognizer.cpp
@@ -87,27 +90,17 @@ add_llvm_library(LLVMCodeGen
Spiller.cpp
SpillPlacement.cpp
SplitKit.cpp
- Splitter.cpp
StackProtector.cpp
StackSlotColoring.cpp
StrongPHIElimination.cpp
TailDuplication.cpp
+ TargetFrameLoweringImpl.cpp
TargetInstrInfoImpl.cpp
TargetLoweringObjectFileImpl.cpp
+ TargetOptionsImpl.cpp
TwoAddressInstructionPass.cpp
UnreachableBlockElim.cpp
VirtRegMap.cpp
- VirtRegRewriter.cpp
- )
-
-add_llvm_library_dependencies(LLVMCodeGen
- LLVMAnalysis
- LLVMCore
- LLVMMC
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
)
add_subdirectory(SelectionDAG)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 14eb0541dc8d..2b7dfdbe41a0 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -58,7 +58,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- for (const unsigned *Alias = TRI.getOverlaps(Reg);
+ for (const uint16_t *Alias = TRI.getOverlaps(Reg);
unsigned Reg = *Alias; ++Alias)
UsedRegs[Reg/32] |= 1 << (Reg&31);
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 424535ba2a1c..a81bb5cc5566 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -19,36 +19,49 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeBranchFolderPassPass(Registry);
initializeCalculateSpillWeightsPass(Registry);
+ initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeExpandPostRAPass(Registry);
+ initializeExpandISelPseudosPass(Registry);
+ initializeFinalizeMachineBundlesPass(Registry);
+ initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
+ initializeLocalStackSlotPassPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePHIEliminationPass(Registry);
initializePeepholeOptimizerPass(Registry);
+ initializePostRASchedulerPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializePEIPass(Registry);
- initializeRALinScanPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenderMachineFunctionPass(Registry);
initializeSlotIndexesPass(Registry);
- initializeLoopSplitterPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeStrongPHIEliminationPass(Registry);
+ initializeTailDuplicatePassPass(Registry);
+ initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnpackMachineBundlesPass(Registry);
initializeUnreachableBlockElimPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 270c337ef67e..c13c05e26a20 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -39,9 +39,6 @@ namespace {
CodePlacementOpt() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const {
- return "Code Placement Optimizer";
- }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfo>();
@@ -69,9 +66,9 @@ namespace {
char CodePlacementOpt::ID = 0;
} // end anonymous namespace
-FunctionPass *llvm::createCodePlacementOptPass() {
- return new CodePlacementOpt();
-}
+char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
+INITIALIZE_PASS(CodePlacementOpt, "code-placement",
+ "Code Placement Optimizer", false, false)
/// HasFallthrough - Test whether the given branch has a fallthrough, either as
/// a plain fallthrough or as a fallthrough case of a conditional branch.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 84c4d59c0e41..bad50103b9c3 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
RegClassInfo(RCI),
Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
KillIndices(TRI->getNumRegs(), 0),
- DefIndices(TRI->getNumRegs(), 0) {}
+ DefIndices(TRI->getNumRegs(), 0),
+ KeepRegs(TRI->getNumRegs(), false) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
@@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
}
// Clear "do not change" set.
- KeepRegs.clear();
+ KeepRegs.reset();
- bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
@@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
E = MRI.liveout_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -102,18 +103,18 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
void CriticalAntiDepBreaker::FinishBlock() {
RegRefs.clear();
- KeepRegs.clear();
+ KeepRegs.reset();
}
void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -193,8 +194,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->getDesc().isCall() ||
- MI->getDesc().hasExtraSrcRegAllocReq() ||
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register operands for this instruction and update
@@ -217,7 +218,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Now check for aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
// If an alias of the reg is used during the live range, give up.
// Note that this allows us to skip checking if AntiDepReg
// overlaps with any of the aliases, among other things.
@@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
RegRefs.insert(std::make_pair(Reg, &MO));
if (MO.isUse() && Special) {
- if (KeepRegs.insert(Reg)) {
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ if (!KeepRegs.test(Reg)) {
+ KeepRegs.set(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
- KeepRegs.insert(*Subreg);
+ KeepRegs.set(*Subreg);
}
}
}
@@ -253,6 +255,17 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// address updates.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
+
+ if (MO.isRegMask())
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (MO.clobbersPhysReg(i)) {
+ DefIndices[i] = Count;
+ KillIndices[i] = ~0u;
+ KeepRegs.reset(i);
+ Classes[i] = 0;
+ RegRefs.erase(i);
+ }
+
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -265,21 +278,21 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
assert(((KillIndices[Reg] == ~0u) !=
(DefIndices[Reg] == ~0u)) &&
"Kill and Def maps aren't consistent for Reg!");
- KeepRegs.erase(Reg);
+ KeepRegs.reset(Reg);
Classes[Reg] = 0;
RegRefs.erase(Reg);
// Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
unsigned SubregReg = *Subreg;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
- KeepRegs.erase(SubregReg);
+ KeepRegs.reset(SubregReg);
Classes[SubregReg] = 0;
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
- for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ for (const uint16_t *Super = TRI->getSuperRegisters(Reg);
*Super; ++Super) {
unsigned SuperReg = *Super;
Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -315,7 +328,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
"Kill and Def maps aren't consistent for Reg!");
}
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
@@ -355,6 +368,9 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &CheckOper = MI->getOperand(i);
+ if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+ return true;
+
if (!CheckOper.isReg() || !CheckOper.isDef() ||
CheckOper.getReg() != NewReg)
continue;
@@ -427,6 +443,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Keep a map of the MachineInstr*'s back to the SUnit representing them.
// This is used for updating debug information.
+ //
+ // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
// Find the node at the bottom of the critical path.
@@ -535,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (!RegClassInfo.isAllocatable(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
- else if (KeepRegs.count(AntiDepReg))
+ else if (KeepRegs.test(AntiDepReg))
// Don't break anti-dependencies if an use down below requires
// this exact register.
AntiDepReg = 0;
@@ -572,7 +590,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 07107802972d..77462593896e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
#include <map>
namespace llvm {
@@ -66,7 +65,7 @@ class TargetRegisterInfo;
/// KeepRegs - A set of registers which are live and cannot be changed to
/// break anti-dependencies.
- SmallSet<unsigned, 4> KeepRegs;
+ BitVector KeepRegs;
public:
CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..bfbe7790998f
--- /dev/null
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,223 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+ const unsigned *SET):
+ InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+ DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+// transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+// for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+ unsigned ThisState = DFAStateEntryTable[state];
+ unsigned NextStateInTable = DFAStateEntryTable[state+1];
+ // Early exit in case CachedTable has already contains this
+ // state's transitions.
+ if (CachedTable.count(UnsignPair(state,
+ DFAStateInputTable[ThisState][0])))
+ return;
+
+ for (unsigned i = ThisState; i < NextStateInTable; i++)
+ CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+ DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ assert(CachedTable.count(StateTrans) != 0);
+ CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ reserveResources(&MID);
+}
+
+namespace llvm {
+// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
+// Schedule method to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+public:
+ DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT, bool IsPostRA);
+ // Schedule - Actual scheduling work.
+ void schedule();
+};
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) :
+ ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+}
+
+void DefaultVLIWScheduler::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(0);
+}
+
+// VLIWPacketizerList Ctor
+VLIWPacketizerList::VLIWPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
+ TII = TM.getInstrInfo();
+ ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+}
+
+// VLIWPacketizerList Dtor
+VLIWPacketizerList::~VLIWPacketizerList() {
+ if (VLIWScheduler)
+ delete VLIWScheduler;
+
+ if (ResourceTracker)
+ delete ResourceTracker;
+}
+
+// endPacket - End the current packet, bundle packet instructions and reset
+// DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ if (CurrentPacketMIs.size() > 1) {
+ MachineInstr *MIFirst = CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst, MI);
+ }
+ CurrentPacketMIs.clear();
+ ResourceTracker->clearResources();
+}
+
+// PacketizeMIs - Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator BeginItr,
+ MachineBasicBlock::iterator EndItr) {
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->schedule();
+ VLIWScheduler->exitRegion();
+
+ // Generate MI -> SU map.
+ //std::map <MachineInstr*, SUnit*> MIToSUnit;
+ MIToSUnit.clear();
+ for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &VLIWScheduler->SUnits[i];
+ MIToSUnit[SU->getInstr()] = SU;
+ }
+
+ // The main packetizer loop.
+ for (; BeginItr != EndItr; ++BeginItr) {
+ MachineInstr *MI = BeginItr;
+
+ this->initPacketizerState();
+
+ // End the current packet if needed.
+ if (this->isSoloInstruction(MI)) {
+ endPacket(MBB, MI);
+ continue;
+ }
+
+ // Ignore pseudo instructions.
+ if (this->ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[MI];
+ assert(SUI && "Missing SUnit Info!");
+
+ // Ask DFA if machine resource is available for MI.
+ bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ if (ResourceAvail) {
+ // Dependency check for MI with instructions in CurrentPacketMIs.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
+ MachineInstr *MJ = *VI;
+ SUnit *SUJ = MIToSUnit[MJ];
+ assert(SUJ && "Missing SUnit Info!");
+
+ // Is it legal to packetize SUI and SUJ together.
+ if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+ // Allow packetization if dependency can be pruned.
+ if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+ // End the packet if dependency cannot be pruned.
+ endPacket(MBB, MI);
+ break;
+ } // !isLegalToPruneDependencies.
+ } // !isLegalToPacketizeTogether.
+ } // For all instructions in CurrentPacketMIs.
+ } else {
+ // End the packet if resource is not available.
+ endPacket(MBB, MI);
+ }
+
+ // Add MI to the current packet.
+ BeginItr = this->addToPacket(MI);
+ } // For all instructions in BB.
+
+ // End any packet left behind.
+ endPacket(MBB, EndItr);
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6de6c0cb81bd..aa10d1d41f2b 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -28,11 +28,12 @@ STATISTIC(NumDeletes, "Number of dead instructions deleted");
namespace {
class DeadMachineInstructionElim : public MachineFunctionPass {
virtual bool runOnMachineFunction(MachineFunction &MF);
-
+
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
BitVector LivePhysRegs;
+ BitVector ReservedRegs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -45,14 +46,11 @@ namespace {
};
}
char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
"Remove dead machine instructions", false, false)
-FunctionPass *llvm::createDeadMachineInstructionElimPass() {
- return new DeadMachineInstructionElim();
-}
-
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
// Technically speaking inline asm without side effects and no defs can still
// be deleted. But there is so much bad inline asm code out there, we should
@@ -70,10 +68,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
- LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
- // This def has a non-debug use. Don't delete the instruction!
- return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+ return false;
+ } else {
+ if (!MRI->use_nodbg_empty(Reg))
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
}
}
}
@@ -89,7 +91,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
// Treat reserved registers as always live.
- BitVector ReservedRegs = TRI->getReservedRegs(MF);
+ ReservedRegs = TRI->getReservedRegs(MF);
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -102,7 +104,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
LivePhysRegs = ReservedRegs;
// Also add any explicit live-out physregs for this block.
- if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ if (!MBB->empty() && MBB->back().isReturn())
for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
unsigned Reg = *LOI;
@@ -169,10 +171,13 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
// this def.
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
*SubRegs; ++SubRegs)
LivePhysRegs.reset(*SubRegs);
}
+ } else if (MO.isRegMask()) {
+ // Register mask of preserved registers. All clobbers are dead.
+ LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
}
}
// Record the physreg uses, after the defs, in case a physreg is
@@ -183,7 +188,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.set(Reg);
- for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+ for (const uint16_t *AliasSet = TRI->getAliasSet(Reg);
*AliasSet; ++AliasSet)
LivePhysRegs.set(*AliasSet);
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index ed9e409d3e5a..944dd4fb41c8 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -28,98 +28,34 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
-STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
-STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
-STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered");
-STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
class DwarfEHPrepare : public FunctionPass {
const TargetMachine *TM;
const TargetLowering *TLI;
- // The eh.exception intrinsic.
- Function *ExceptionValueIntrinsic;
-
- // The eh.selector intrinsic.
- Function *SelectorIntrinsic;
-
- // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
- Constant *URoR;
-
- // The EH language-specific catch-all type.
- GlobalVariable *EHCatchAllValue;
-
- // _Unwind_Resume or the target equivalent.
+ // RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
- // We both use and preserve dominator info.
- DominatorTree *DT;
-
- // The function we are running on.
- Function *F;
-
- // The landing pads for this function.
- typedef SmallPtrSet<BasicBlock*, 8> BBSet;
- BBSet LandingPads;
-
- bool InsertUnwindResumeCalls();
-
- bool NormalizeLandingPads();
- bool LowerUnwindsAndResumes();
- bool MoveExceptionValueCalls();
-
- Instruction *CreateExceptionValueCall(BasicBlock *BB);
-
- /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
- /// use the "llvm.eh.catch.all.value" call need to convert to using its
- /// initializer instead.
- bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
-
- bool HasCatchAllInSelector(IntrinsicInst *);
+ bool InsertUnwindResumeCalls(Function &Fn);
+ Instruction *GetExceptionObject(ResumeInst *RI);
- /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
- void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
- SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
-
- /// FindAllURoRInvokes - Find all URoR invokes in the function.
- void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
-
- /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
- /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
- /// a landing pad within the current function. This is a candidate to merge
- /// the selector associated with the URoR invoke with the one from the
- /// URoR's landing pad.
- bool HandleURoRInvokes();
-
- /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
- /// with the eh.exception call. This recursively looks past instructions
- /// which don't change the EH pointer value, like casts or PHI nodes.
- bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
- SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
- SmallPtrSet<PHINode*, 32> &SeenPHIs);
-
public:
static char ID; // Pass identification, replacement for typeid.
DwarfEHPrepare(const TargetMachine *tm) :
FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
- ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
- URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+ RewindFunction(0) {
initializeDominatorTreePass(*PassRegistry::getPassRegistry());
}
virtual bool runOnFunction(Function &Fn);
- // getAnalysisUsage - We need the dominator tree for handling URoR.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
- }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
const char *getPassName() const {
return "Exception handling preparation";
}
-
};
} // end anonymous namespace
@@ -129,543 +65,52 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
return new DwarfEHPrepare(tm);
}
-/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
-/// catch-all.
-bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
- if (!EHCatchAllValue) return false;
-
- unsigned ArgIdx = II->getNumArgOperands() - 1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
- return GV == EHCatchAllValue;
-}
-
-/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
-void DwarfEHPrepare::
-FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
- SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
- for (Value::use_iterator
- I = SelectorIntrinsic->use_begin(),
- E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *II = cast<IntrinsicInst>(*I);
-
- if (II->getParent()->getParent() != F)
- continue;
-
- if (!HasCatchAllInSelector(II))
- Sels.insert(II);
- else
- CatchAllSels.insert(II);
- }
-}
-
-/// FindAllURoRInvokes - Find all URoR invokes in the function.
-void DwarfEHPrepare::
-FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
- for (Value::use_iterator
- I = URoR->use_begin(),
- E = URoR->use_end(); I != E; ++I) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
- URoRInvokes.insert(II);
- }
-}
-
-/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the "llvm.eh.catch.all.value" call need to convert to using its
-/// initializer instead.
-bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
- if (!EHCatchAllValue) return false;
-
- if (!SelectorIntrinsic) {
- SelectorIntrinsic =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
- if (!SelectorIntrinsic) return false;
- }
-
- bool Changed = false;
- for (SmallPtrSet<IntrinsicInst*, 32>::iterator
- I = Sels.begin(), E = Sels.end(); I != E; ++I) {
- IntrinsicInst *Sel = *I;
-
- // Index of the "llvm.eh.catch.all.value" variable.
- unsigned OpIdx = Sel->getNumArgOperands() - 1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
- if (GV != EHCatchAllValue) continue;
- Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
- Changed = true;
- }
-
- return Changed;
-}
-
-/// FindSelectorAndURoR - Find the eh.selector call associated with the
-/// eh.exception call. And indicate if there is a URoR "invoke" associated with
-/// the eh.exception call. This recursively looks past instructions which don't
-/// change the EH pointer value, like casts or PHI nodes.
-bool
-DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
- SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
- SmallPtrSet<PHINode*, 32> &SeenPHIs) {
- bool Changed = false;
-
- for (Value::use_iterator
- I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
- Instruction *II = dyn_cast<Instruction>(*I);
- if (!II || II->getParent()->getParent() != F) continue;
-
- if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
- if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
- SelCalls.insert(Sel);
- } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
- if (Invoke->getCalledFunction() == URoR)
- URoRInvoke = true;
- } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
- Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
- } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
- if (SeenPHIs.insert(PN))
- // Don't process a PHI node more than once.
- Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
- }
- }
-
- return Changed;
-}
-
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
-/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
-/// landing pad within the current function. This is a candidate to merge the
-/// selector associated with the URoR invoke with the one from the URoR's
-/// landing pad.
-bool DwarfEHPrepare::HandleURoRInvokes() {
- if (!EHCatchAllValue) {
- EHCatchAllValue =
- F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
- if (!EHCatchAllValue) return false;
- }
-
- if (!SelectorIntrinsic) {
- SelectorIntrinsic =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
- if (!SelectorIntrinsic) return false;
- }
-
- SmallPtrSet<IntrinsicInst*, 32> Sels;
- SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
- FindAllCleanupSelectors(Sels, CatchAllSels);
-
- if (!URoR) {
- URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
- if (!URoR) return CleanupSelectors(CatchAllSels);
- }
-
- SmallPtrSet<InvokeInst*, 32> URoRInvokes;
- FindAllURoRInvokes(URoRInvokes);
-
- SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
-
- for (SmallPtrSet<IntrinsicInst*, 32>::iterator
- SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
- const BasicBlock *SelBB = (*SI)->getParent();
- for (SmallPtrSet<InvokeInst*, 32>::iterator
- UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
- const BasicBlock *URoRBB = (*UI)->getParent();
- if (DT->dominates(SelBB, URoRBB)) {
- SelsToConvert.insert(*SI);
- break;
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+ Value *V = RI->getOperand(0);
+ Instruction *ExnObj = 0;
+ InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+ LoadInst *SelLoad = 0;
+ InsertValueInst *ExcIVI = 0;
+ bool EraseIVIs = false;
+
+ if (SelIVI) {
+ if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+ ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+ if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+ ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+ ExnObj = cast<Instruction>(ExcIVI->getOperand(1));
+ SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+ EraseIVIs = true;
}
}
}
- bool Changed = false;
-
- if (Sels.size() != SelsToConvert.size()) {
- // If we haven't been able to convert all of the clean-up selectors, then
- // loop through the slow way to see if they still need to be converted.
- if (!ExceptionValueIntrinsic) {
- ExceptionValueIntrinsic =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
- if (!ExceptionValueIntrinsic)
- return CleanupSelectors(CatchAllSels);
- }
-
- for (Value::use_iterator
- I = ExceptionValueIntrinsic->use_begin(),
- E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
- if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
-
- bool URoRInvoke = false;
- SmallPtrSet<IntrinsicInst*, 8> SelCalls;
- SmallPtrSet<PHINode*, 32> SeenPHIs;
- Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
-
- if (URoRInvoke) {
- // This EH pointer is being used by an invoke of an URoR instruction and
- // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
- // need to convert it to a 'catch-all'.
- for (SmallPtrSet<IntrinsicInst*, 8>::iterator
- SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
- if (!HasCatchAllInSelector(*SI))
- SelsToConvert.insert(*SI);
- }
- }
- }
-
- if (!SelsToConvert.empty()) {
- // Convert all clean-up eh.selectors, which are associated with "invokes" of
- // URoR calls, into catch-all eh.selectors.
- Changed = true;
-
- for (SmallPtrSet<IntrinsicInst*, 8>::iterator
- SI = SelsToConvert.begin(), SE = SelsToConvert.end();
- SI != SE; ++SI) {
- IntrinsicInst *II = *SI;
-
- // Use the exception object pointer and the personality function
- // from the original selector.
- CallSite CS(II);
- IntrinsicInst::op_iterator I = CS.arg_begin();
- IntrinsicInst::op_iterator E = CS.arg_end();
- IntrinsicInst::op_iterator B = prior(E);
-
- // Exclude last argument if it is an integer.
- if (isa<ConstantInt>(B)) E = B;
+ if (!ExnObj)
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
- // Add exception object pointer (front).
- // Add personality function (next).
- // Add in any filter IDs (rest).
- SmallVector<Value*, 8> Args(I, E);
+ RI->eraseFromParent();
- Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
-
- CallInst *NewSelector =
- CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
-
- NewSelector->setTailCall(II->isTailCall());
- NewSelector->setAttributes(II->getAttributes());
- NewSelector->setCallingConv(II->getCallingConv());
-
- II->replaceAllUsesWith(NewSelector);
- II->eraseFromParent();
- }
+ if (EraseIVIs) {
+ if (SelIVI->getNumUses() == 0)
+ SelIVI->eraseFromParent();
+ if (ExcIVI->getNumUses() == 0)
+ ExcIVI->eraseFromParent();
+ if (SelLoad && SelLoad->getNumUses() == 0)
+ SelLoad->eraseFromParent();
}
- Changed |= CleanupSelectors(CatchAllSels);
- return Changed;
-}
-
-/// NormalizeLandingPads - Normalize and discover landing pads, noting them
-/// in the LandingPads set. A landing pad is normal if the only CFG edges
-/// that end at it are unwind edges from invoke instructions. If we inlined
-/// through an invoke we could have a normal branch from the previous
-/// unwind block through to the landing pad for the original invoke.
-/// Abnormal landing pads are fixed up by redirecting all unwind edges to
-/// a new basic block which falls through to the original.
-bool DwarfEHPrepare::NormalizeLandingPads() {
- bool Changed = false;
-
- const MCAsmInfo *MAI = TM->getMCAsmInfo();
- bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
-
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- TerminatorInst *TI = I->getTerminator();
- if (!isa<InvokeInst>(TI))
- continue;
- BasicBlock *LPad = TI->getSuccessor(1);
- // Skip landing pads that have already been normalized.
- if (LandingPads.count(LPad))
- continue;
-
- // Check that only invoke unwind edges end at the landing pad.
- bool OnlyUnwoundTo = true;
- bool SwitchOK = usingSjLjEH;
- for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
- PI != PE; ++PI) {
- TerminatorInst *PT = (*PI)->getTerminator();
- // The SjLj dispatch block uses a switch instruction. This is effectively
- // an unwind edge, so we can disregard it here. There will only ever
- // be one dispatch, however, so if there are multiple switches, one
- // of them truly is a normal edge, not an unwind edge.
- if (SwitchOK && isa<SwitchInst>(PT)) {
- SwitchOK = false;
- continue;
- }
- if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
- OnlyUnwoundTo = false;
- break;
- }
- }
-
- if (OnlyUnwoundTo) {
- // Only unwind edges lead to the landing pad. Remember the landing pad.
- LandingPads.insert(LPad);
- continue;
- }
-
- // At least one normal edge ends at the landing pad. Redirect the unwind
- // edges to a new basic block which falls through into this one.
-
- // Create the new basic block.
- BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
- LPad->getName() + "_unwind_edge");
-
- // Insert it into the function right before the original landing pad.
- LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
-
- // Redirect unwind edges from the original landing pad to NewBB.
- for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
- TerminatorInst *PT = (*PI++)->getTerminator();
- if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
- // Unwind to the new block.
- PT->setSuccessor(1, NewBB);
- }
-
- // If there are any PHI nodes in LPad, we need to update them so that they
- // merge incoming values from NewBB instead.
- for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
- PHINode *PN = cast<PHINode>(II);
- pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
-
- // Check to see if all of the values coming in via unwind edges are the
- // same. If so, we don't need to create a new PHI node.
- Value *InVal = PN->getIncomingValueForBlock(*PB);
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
- InVal = 0;
- break;
- }
- }
-
- if (InVal == 0) {
- // Different unwind edges have different values. Create a new PHI node
- // in NewBB.
- PHINode *NewPN = PHINode::Create(PN->getType(),
- PN->getNumIncomingValues(),
- PN->getName()+".unwind", NewBB);
- // Add an entry for each unwind edge, using the value from the old PHI.
- for (pred_iterator PI = PB; PI != PE; ++PI)
- NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
-
- // Now use this new PHI as the common incoming value for NewBB in PN.
- InVal = NewPN;
- }
-
- // Revector exactly one entry in the PHI node to come from NewBB
- // and delete all other entries that come from unwind edges. If
- // there are both normal and unwind edges from the same predecessor,
- // this leaves an entry for the normal edge.
- for (pred_iterator PI = PB; PI != PE; ++PI)
- PN->removeIncomingValue(*PI);
- PN->addIncoming(InVal, NewBB);
- }
-
- // Add a fallthrough from NewBB to the original landing pad.
- BranchInst::Create(LPad, NewBB);
-
- // Now update DominatorTree analysis information.
- DT->splitBlock(NewBB);
-
- // Remember the newly constructed landing pad. The original landing pad
- // LPad is no longer a landing pad now that all unwind edges have been
- // revectored to NewBB.
- LandingPads.insert(NewBB);
- ++NumLandingPadsSplit;
- Changed = true;
- }
-
- return Changed;
-}
-
-/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
-/// rethrowing any previously caught exception. This will crash horribly
-/// at runtime if there is no such exception: using unwind to throw a new
-/// exception is currently not supported.
-bool DwarfEHPrepare::LowerUnwindsAndResumes() {
- SmallVector<Instruction*, 16> ResumeInsts;
-
- for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
- for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
- if (isa<UnwindInst>(bi))
- ResumeInsts.push_back(bi);
- else if (CallInst *call = dyn_cast<CallInst>(bi))
- if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
- if (fn->getName() == "llvm.eh.resume")
- ResumeInsts.push_back(bi);
- }
- }
-
- if (ResumeInsts.empty()) return false;
-
- // Find the rewind function if we didn't already.
- if (!RewindFunction) {
- LLVMContext &Ctx = ResumeInsts[0]->getContext();
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
- Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
- }
-
- bool Changed = false;
-
- for (SmallVectorImpl<Instruction*>::iterator
- I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
- Instruction *RI = *I;
-
- // Replace the resuming instruction with a call to _Unwind_Resume (or the
- // appropriate target equivalent).
-
- llvm::Value *ExnValue;
- if (isa<UnwindInst>(RI))
- ExnValue = CreateExceptionValueCall(RI->getParent());
- else
- ExnValue = cast<CallInst>(RI)->getArgOperand(0);
-
- // Create the call...
- CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
- CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
-
- // ...followed by an UnreachableInst, if it was an unwind.
- // Calls to llvm.eh.resume are typically already followed by this.
- if (isa<UnwindInst>(RI))
- new UnreachableInst(RI->getContext(), RI);
-
- if (isa<UnwindInst>(RI))
- ++NumUnwindsLowered;
- else
- ++NumResumesLowered;
-
- // Nuke the resume instruction.
- RI->eraseFromParent();
-
- Changed = true;
- }
-
- return Changed;
-}
-
-/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
-/// landing pads by replacing calls outside of landing pads with direct use of
-/// a register holding the appropriate value; this requires adding calls inside
-/// all landing pads to initialize the register. Also, move eh.exception calls
-/// inside landing pads to the start of the landing pad (optional, but may make
-/// things simpler for later passes).
-bool DwarfEHPrepare::MoveExceptionValueCalls() {
- // If the eh.exception intrinsic is not declared in the module then there is
- // nothing to do. Speed up compilation by checking for this common case.
- if (!ExceptionValueIntrinsic &&
- !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
- return false;
-
- bool Changed = false;
-
- // Move calls to eh.exception that are inside a landing pad to the start of
- // the landing pad.
- for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
- LI != LE; ++LI) {
- BasicBlock *LP = *LI;
- for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
- II != IE;)
- if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
- // Found a call to eh.exception.
- if (!EI->use_empty()) {
- // If there is already a call to eh.exception at the start of the
- // landing pad, then get hold of it; otherwise create such a call.
- Value *CallAtStart = CreateExceptionValueCall(LP);
-
- // If the call was at the start of a landing pad then leave it alone.
- if (EI == CallAtStart)
- continue;
- EI->replaceAllUsesWith(CallAtStart);
- }
- EI->eraseFromParent();
- ++NumExceptionValuesMoved;
- Changed = true;
- }
- }
-
- // Look for calls to eh.exception that are not in a landing pad. If one is
- // found, then a register that holds the exception value will be created in
- // each landing pad, and the SSAUpdater will be used to compute the values
- // returned by eh.exception calls outside of landing pads.
- SSAUpdater SSA;
-
- // Remember where we found the eh.exception call, to avoid rescanning earlier
- // basic blocks which we already know contain no eh.exception calls.
- bool FoundCallOutsideLandingPad = false;
- Function::iterator BB = F->begin();
- for (Function::iterator BE = F->end(); BB != BE; ++BB) {
- // Skip over landing pads.
- if (LandingPads.count(BB))
- continue;
-
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II)
- if (isa<EHExceptionInst>(II)) {
- SSA.Initialize(II->getType(), II->getName());
- FoundCallOutsideLandingPad = true;
- break;
- }
-
- if (FoundCallOutsideLandingPad)
- break;
- }
-
- // If all calls to eh.exception are in landing pads then we are done.
- if (!FoundCallOutsideLandingPad)
- return Changed;
-
- // Add a call to eh.exception at the start of each landing pad, and tell the
- // SSAUpdater that this is the value produced by the landing pad.
- for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
- LI != LE; ++LI)
- SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
-
- // Now turn all calls to eh.exception that are not in a landing pad into a use
- // of the appropriate register.
- for (Function::iterator BE = F->end(); BB != BE; ++BB) {
- // Skip over landing pads.
- if (LandingPads.count(BB))
- continue;
-
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE;)
- if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
- // Found a call to eh.exception, replace it with the value from any
- // upstream landing pad(s).
- EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
- EI->eraseFromParent();
- ++NumExceptionValuesMoved;
- }
- }
-
- return true;
-}
-
-/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
-/// the start of the basic block (unless there already is one, in which case
-/// the existing call is returned).
-Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
- Instruction *Start = BB->getFirstNonPHIOrDbg();
- // Is this a call to eh.exception?
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
- if (CI->getIntrinsicID() == Intrinsic::eh_exception)
- // Reuse the existing call.
- return Start;
-
- // Find the eh.exception intrinsic if we didn't already.
- if (!ExceptionValueIntrinsic)
- ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::eh_exception);
-
- // Create the call.
- return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+ return ExnObj;
}
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
/// into calls to the appropriate _Unwind_Resume function.
-bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
bool UsesNewEH = false;
SmallVector<ResumeInst*, 16> Resumes;
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
TerminatorInst *TI = I->getTerminator();
if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
Resumes.push_back(RI);
@@ -682,27 +127,45 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
Type::getInt8PtrTy(Ctx), false);
const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
}
// Create the basic block where the _Unwind_Resume call will live.
- LLVMContext &Ctx = F->getContext();
- BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(),
+ LLVMContext &Ctx = Fn.getContext();
+ unsigned ResumesSize = Resumes.size();
+
+ if (ResumesSize == 1) {
+ // Instead of creating a new BB and PHI node, just append the call to
+ // _Unwind_Resume to the end of the single resume block.
+ ResumeInst *RI = Resumes.front();
+ BasicBlock *UnwindBB = RI->getParent();
+ Instruction *ExnObj = GetExceptionObject(RI);
+
+ // Call the _Unwind_Resume function.
+ CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+ }
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
"exn.obj", UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
// that feeds the _Unwind_Resume call.
- BasicBlock *UnwindBBDom = Resumes[0]->getParent();
for (SmallVectorImpl<ResumeInst*>::iterator
I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
ResumeInst *RI = *I;
- BranchInst::Create(UnwindBB, RI->getParent());
- ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0),
- 0, "exn.obj", RI);
- PN->addIncoming(ExnObj, RI->getParent());
- UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom);
- RI->eraseFromParent();
+ BasicBlock *Parent = RI->getParent();
+ BranchInst::Create(UnwindBB, Parent);
+
+ Instruction *ExnObj = GetExceptionObject(RI);
+ PN->addIncoming(ExnObj, Parent);
+
+ ++NumResumesLowered;
}
// Call the function.
@@ -711,40 +174,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
// We never expect _Unwind_Resume to return.
new UnreachableInst(Ctx, UnwindBB);
-
- // Now update DominatorTree analysis information.
- DT->addNewBlock(UnwindBB, UnwindBBDom);
return true;
}
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
- bool Changed = false;
-
- // Initialize internal state.
- DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH.
- F = &Fn;
-
- if (InsertUnwindResumeCalls()) {
- // FIXME: The reset of this function can go once the new EH is done.
- LandingPads.clear();
- return true;
- }
-
- // Ensure that only unwind edges end at landing pads (a landing pad is a
- // basic block where an invoke unwind edge ends).
- Changed |= NormalizeLandingPads();
-
- // Turn unwind instructions and eh.resume calls into libcalls.
- Changed |= LowerUnwindsAndResumes();
-
- // TODO: Move eh.selector calls to landing pads and combine them.
-
- // Move eh.exception calls to landing pads.
- Changed |= MoveExceptionValueCalls();
-
- Changed |= HandleURoRInvokes();
-
- LandingPads.clear();
-
+ bool Changed = InsertUnwindResumeCalls(Fn);
return Changed;
}
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
deleted file mode 100644
index 5b634682cc87..000000000000
--- a/lib/CodeGen/ELF.h
+++ /dev/null
@@ -1,227 +0,0 @@
-//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header contains common, non-processor-specific data structures and
-// constants for the ELF file format.
-//
-// The details of the ELF32 bits in this file are largely based on the Tool
-// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
-// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
-// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ELF_H
-#define CODEGEN_ELF_H
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
- class GlobalValue;
-
- /// ELFSym - This struct contains information about each symbol that is
- /// added to logical symbol table for the module. This is eventually
- /// turned into a real symbol table in the file.
- struct ELFSym {
-
- // ELF symbols are related to llvm ones by being one of the two llvm
- // types, for the other ones (section, file, func) a null pointer is
- // assumed by default.
- union {
- const GlobalValue *GV; // If this is a pointer to a GV
- const char *Ext; // If this is a pointer to a named symbol
- } Source;
-
- // Describes from which source type this ELF symbol comes from,
- // they can be GlobalValue, ExternalSymbol or neither.
- enum {
- isGV, // The Source.GV field is valid.
- isExtSym, // The Source.ExtSym field is valid.
- isOther // Not a GlobalValue or External Symbol
- };
- unsigned SourceType;
-
- bool isGlobalValue() const { return SourceType == isGV; }
- bool isExternalSym() const { return SourceType == isExtSym; }
-
- // getGlobalValue - If this is a global value which originated the
- // elf symbol, return a reference to it.
- const GlobalValue *getGlobalValue() const {
- assert(SourceType == isGV && "This is not a global value");
- return Source.GV;
- }
-
- // getExternalSym - If this is an external symbol which originated the
- // elf symbol, return a reference to it.
- const char *getExternalSymbol() const {
- assert(SourceType == isExtSym && "This is not an external symbol");
- return Source.Ext;
- }
-
- // getGV - From a global value return a elf symbol to represent it
- static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
- unsigned Type, unsigned Visibility) {
- ELFSym *Sym = new ELFSym();
- Sym->Source.GV = GV;
- Sym->setBind(Bind);
- Sym->setType(Type);
- Sym->setVisibility(Visibility);
- Sym->SourceType = isGV;
- return Sym;
- }
-
- // getExtSym - Create and return an elf symbol to represent an
- // external symbol
- static ELFSym *getExtSym(const char *Ext) {
- ELFSym *Sym = new ELFSym();
- Sym->Source.Ext = Ext;
- Sym->setBind(ELF::STB_GLOBAL);
- Sym->setType(ELF::STT_NOTYPE);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SourceType = isExtSym;
- return Sym;
- }
-
- // getSectionSym - Returns a elf symbol to represent an elf section
- static ELFSym *getSectionSym() {
- ELFSym *Sym = new ELFSym();
- Sym->setBind(ELF::STB_LOCAL);
- Sym->setType(ELF::STT_SECTION);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SourceType = isOther;
- return Sym;
- }
-
- // getFileSym - Returns a elf symbol to represent the module identifier
- static ELFSym *getFileSym() {
- ELFSym *Sym = new ELFSym();
- Sym->setBind(ELF::STB_LOCAL);
- Sym->setType(ELF::STT_FILE);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS;
- Sym->SourceType = isOther;
- return Sym;
- }
-
- // getUndefGV - Returns a STT_NOTYPE symbol
- static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
- ELFSym *Sym = new ELFSym();
- Sym->Source.GV = GV;
- Sym->setBind(Bind);
- Sym->setType(ELF::STT_NOTYPE);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF;
- Sym->SourceType = isGV;
- return Sym;
- }
-
- // ELF specific fields
- unsigned NameIdx; // Index in .strtab of name, once emitted.
- uint64_t Value;
- unsigned Size;
- uint8_t Info;
- uint8_t Other;
- unsigned short SectionIdx;
-
- // Symbol index into the Symbol table
- unsigned SymTabIdx;
-
- ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
- Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
- SymTabIdx(0) {}
-
- unsigned getBind() const { return (Info >> 4) & 0xf; }
- unsigned getType() const { return Info & 0xf; }
- bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
- bool isFileType() const { return getType() == ELF::STT_FILE; }
-
- void setBind(unsigned X) {
- assert(X == (X & 0xF) && "Bind value out of range!");
- Info = (Info & 0x0F) | (X << 4);
- }
-
- void setType(unsigned X) {
- assert(X == (X & 0xF) && "Type value out of range!");
- Info = (Info & 0xF0) | X;
- }
-
- void setVisibility(unsigned V) {
- assert(V == (V & 0x3) && "Visibility value out of range!");
- Other = V;
- }
- };
-
- /// ELFSection - This struct contains information about each section that is
- /// emitted to the file. This is eventually turned into the section header
- /// table at the end of the file.
- class ELFSection : public BinaryObject {
- public:
- // ELF specific fields
- unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted.
- unsigned Type; // sh_type - Section contents & semantics
- unsigned Flags; // sh_flags - Section flags.
- uint64_t Addr; // sh_addr - The mem addr this section is in.
- unsigned Offset; // sh_offset - Offset from the file start
- unsigned Size; // sh_size - The section size.
- unsigned Link; // sh_link - Section header table index link.
- unsigned Info; // sh_info - Auxiliary information.
- unsigned Align; // sh_addralign - Alignment of section.
- unsigned EntSize; // sh_entsize - Size of entries in the section e
-
- /// SectionIdx - The number of the section in the Section Table.
- unsigned short SectionIdx;
-
- /// Sym - The symbol to represent this section if it has one.
- ELFSym *Sym;
-
- /// getSymIndex - Returns the symbol table index of the symbol
- /// representing this section.
- unsigned getSymbolTableIndex() const {
- assert(Sym && "section not present in the symbol table");
- return Sym->SymTabIdx;
- }
-
- ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
- : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
- Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
- };
-
- /// ELFRelocation - This class contains all the information necessary to
- /// to generate any 32-bit or 64-bit ELF relocation entry.
- class ELFRelocation {
- uint64_t r_offset; // offset in the section of the object this applies to
- uint32_t r_symidx; // symbol table index of the symbol to use
- uint32_t r_type; // machine specific relocation type
- int64_t r_add; // explicit relocation addend
- bool r_rela; // if true then the addend is part of the entry
- // otherwise the addend is at the location specified
- // by r_offset
- public:
- uint64_t getInfo(bool is64Bit) const {
- if (is64Bit)
- return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
- else
- return (r_symidx << 8) + (r_type & 0xFFL);
- }
-
- uint64_t getOffset() const { return r_offset; }
- int64_t getAddend() const { return r_add; }
-
- ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
- bool rela = true, int64_t addend = 0) :
- r_offset(off), r_symidx(sym), r_type(type),
- r_add(addend), r_rela(rela) {}
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
deleted file mode 100644
index 660424c3c141..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfce"
-
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-//===----------------------------------------------------------------------===//
-// ELFCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// startFunction - This callback is invoked when a new machine function is
-/// about to be emitted.
-void ELFCodeEmitter::startFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "processing function: "
- << MF.getFunction()->getName() << "\n");
-
- // Get the ELF Section that this function belongs in.
- ES = &EW.getTextSection(MF.getFunction());
-
- // Set the desired binary object to be used by the code emitters
- setBinaryObject(ES);
-
- // Get the function alignment in bytes
- unsigned Align = (1 << MF.getAlignment());
-
- // The function must start on its required alignment
- ES->emitAlignment(Align);
-
- // Update the section alignment if needed.
- ES->Align = std::max(ES->Align, Align);
-
- // Record the function start offset
- FnStartOff = ES->getCurrentPCOffset();
-
- // Emit constant pool and jump tables to their appropriate sections.
- // They need to be emitted before the function because in some targets
- // the later may reference JT or CP entry address.
- emitConstantPool(MF.getConstantPool());
- if (MF.getJumpTableInfo())
- emitJumpTables(MF.getJumpTableInfo());
-}
-
-/// finishFunction - This callback is invoked after the function is completely
-/// finished.
-bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
- // Add a symbol to represent the function.
- const Function *F = MF.getFunction();
- ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
- EW.getGlobalELFVisibility(F));
- FnSym->SectionIdx = ES->SectionIdx;
- FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
- EW.AddPendingGlobalSymbol(F, true);
-
- // Offset from start of Section
- FnSym->Value = FnStartOff;
-
- if (!F->hasPrivateLinkage())
- EW.SymbolList.push_back(FnSym);
-
- // Patch up Jump Table Section relocations to use the real MBBs offsets
- // now that the MBB label offsets inside the function are known.
- if (MF.getJumpTableInfo()) {
- ELFSection &JTSection = EW.getJumpTableSection();
- for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
- MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
- MachineRelocation &MR = *MRI;
- uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
- MR.setResultPointer((void*)MBBOffset);
- MR.setConstantVal(ES->SectionIdx);
- JTSection.addRelocation(MR);
- }
- }
-
- // If we have emitted any relocations to function-specific objects such as
- // basic blocks, constant pools entries, or jump tables, record their
- // addresses now so that we can rewrite them with the correct addresses later
- for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
- MachineRelocation &MR = Relocations[i];
- intptr_t Addr;
- if (MR.isGlobalValue()) {
- EW.AddPendingGlobalSymbol(MR.getGlobalValue());
- } else if (MR.isExternalSymbol()) {
- EW.AddPendingExternalSymbol(MR.getExternalSymbol());
- } else if (MR.isBasicBlock()) {
- Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
- MR.setConstantVal(ES->SectionIdx);
- MR.setResultPointer((void*)Addr);
- } else if (MR.isConstantPoolIndex()) {
- Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
- MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
- MR.setResultPointer((void*)Addr);
- } else if (MR.isJumpTableIndex()) {
- ELFSection &JTSection = EW.getJumpTableSection();
- Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
- MR.setConstantVal(JTSection.SectionIdx);
- MR.setResultPointer((void*)Addr);
- } else {
- llvm_unreachable("Unhandled relocation type");
- }
- ES->addRelocation(MR);
- }
-
- // Clear per-function data structures.
- JTRelocations.clear();
- Relocations.clear();
- CPLocations.clear();
- CPSections.clear();
- JTLocations.clear();
- MBBLocations.clear();
- return false;
-}
-
-/// emitConstantPool - For each constant pool entry, figure out which section
-/// the constant should live in and emit the constant
-void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
- const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
- if (CP.empty()) return;
-
- // TODO: handle PIC codegen
- assert(TM.getRelocationModel() != Reloc::PIC_ &&
- "PIC codegen not yet handled for elf constant pools!");
-
- for (unsigned i = 0, e = CP.size(); i != e; ++i) {
- MachineConstantPoolEntry CPE = CP[i];
-
- // Record the constant pool location and the section index
- ELFSection &CstPool = EW.getConstantPoolSection(CPE);
- CPLocations.push_back(CstPool.size());
- CPSections.push_back(CstPool.SectionIdx);
-
- if (CPE.isMachineConstantPoolEntry())
- assert(0 && "CPE.isMachineConstantPoolEntry not supported yet");
-
- // Emit the constant to constant pool section
- EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
- }
-}
-
-/// emitJumpTables - Emit all the jump tables for a given jump table info
-/// record to the appropriate section.
-void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty()) return;
-
- // FIXME: handle PIC codegen
- assert(TM.getRelocationModel() != Reloc::PIC_ &&
- "PIC codegen not yet handled for elf jump tables!");
-
- const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
- unsigned EntrySize = 4; //MJTI->getEntrySize();
-
- // Get the ELF Section to emit the jump table
- ELFSection &JTSection = EW.getJumpTableSection();
-
- // For each JT, record its offset from the start of the section
- for (unsigned i = 0, e = JT.size(); i != e; ++i) {
- const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-
- // Record JT 'i' offset in the JT section
- JTLocations.push_back(JTSection.size());
-
- // Each MBB entry in the Jump table section has a relocation entry
- // against the current text section.
- for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
- unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
- MachineRelocation MR =
- MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
-
- // Add the relocation to the Jump Table section
- JTRelocations.push_back(MR);
-
- // Output placeholder for MBB in the JT section
- for (unsigned s=0; s < EntrySize; ++s)
- JTSection.emitByte(0);
- }
- }
-}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
deleted file mode 100644
index 8671c674eecf..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFCODEEMITTER_H
-#define ELFCODEEMITTER_H
-
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include <vector>
-
-namespace llvm {
- class ELFWriter;
- class ELFSection;
-
- /// ELFCodeEmitter - This class is used by the ELFWriter to
- /// emit the code for functions to the ELF file.
- class ELFCodeEmitter : public ObjectCodeEmitter {
- ELFWriter &EW;
-
- /// Target machine description
- TargetMachine &TM;
-
- /// Section containing code for functions
- ELFSection *ES;
-
- /// Relocations - Record relocations needed by the current function
- std::vector<MachineRelocation> Relocations;
-
- /// JTRelocations - Record relocations needed by the relocation
- /// section.
- std::vector<MachineRelocation> JTRelocations;
-
- /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
- uintptr_t FnStartOff;
- public:
- explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
-
- /// addRelocation - Register new relocations for this function
- void addRelocation(const MachineRelocation &MR) {
- Relocations.push_back(MR);
- }
-
- /// emitConstantPool - For each constant pool entry, figure out which
- /// section the constant should live in and emit data to it
- void emitConstantPool(MachineConstantPool *MCP);
-
- /// emitJumpTables - Emit all the jump tables for a given jump table
- /// info and record them to the appropriate section.
- void emitJumpTables(MachineJumpTableInfo *MJTI);
-
- void startFunction(MachineFunction &F);
- bool finishFunction(MachineFunction &F);
-
- /// emitLabel - Emits a label
- virtual void emitLabel(MCSymbol *Label) {
- assert(0 && "emitLabel not implemented");
- }
-
- /// getLabelAddress - Return the address of the specified LabelID,
- /// only usable after the LabelID has been emitted.
- virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
- assert(0 && "getLabelAddress not implemented");
- return 0;
- }
-
- virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
-
-}; // end class ELFCodeEmitter
-
-} // end namespace llvm
-
-#endif
-
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
deleted file mode 100644
index f2c218565854..000000000000
--- a/lib/CodeGen/ELFWriter.cpp
+++ /dev/null
@@ -1,1105 +0,0 @@
-//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the target-independent ELF writer. This file writes out
-// the ELF file in the following order:
-//
-// #1. ELF Header
-// #2. '.text' section
-// #3. '.data' section
-// #4. '.bss' section (conceptual position in file)
-// ...
-// #X. '.shstrtab' section
-// #Y. Section Table
-//
-// The entries in the section table are laid out as:
-// #0. Null entry [required]
-// #1. ".text" entry - the program code
-// #2. ".data" entry - global variables with initializers. [ if needed ]
-// #3. ".bss" entry - global variables without initializers. [ if needed ]
-// ...
-// #N. ".shstrtab" entry - String table for the section names.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfwriter"
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-char ELFWriter::ID = 0;
-
-//===----------------------------------------------------------------------===//
-// ELFWriter Implementation
-//===----------------------------------------------------------------------===//
-
-ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
- : MachineFunctionPass(ID), O(o), TM(tm),
- OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(),
- &TM.getTargetLowering()->getObjFileLowering())),
- TLOF(TM.getTargetLowering()->getObjFileLowering()),
- is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
- isLittleEndian(TM.getTargetData()->isLittleEndian()),
- ElfHdr(isLittleEndian, is64Bit) {
-
- MAI = TM.getMCAsmInfo();
- TEW = TM.getELFWriterInfo();
-
- // Create the object code emitter object for this target.
- ElfCE = new ELFCodeEmitter(*this);
-
- // Initial number of sections
- NumSections = 0;
-}
-
-ELFWriter::~ELFWriter() {
- delete ElfCE;
- delete &OutContext;
-
- while(!SymbolList.empty()) {
- delete SymbolList.back();
- SymbolList.pop_back();
- }
-
- while(!PrivateSyms.empty()) {
- delete PrivateSyms.back();
- PrivateSyms.pop_back();
- }
-
- while(!SectionList.empty()) {
- delete SectionList.back();
- SectionList.pop_back();
- }
-
- // Release the name mangler object.
- delete Mang; Mang = 0;
-}
-
-// doInitialization - Emit the file header and all of the global variables for
-// the module to the ELF file.
-bool ELFWriter::doInitialization(Module &M) {
- // Initialize TargetLoweringObjectFile.
- const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
-
- Mang = new Mangler(OutContext, *TM.getTargetData());
-
- // ELF Header
- // ----------
- // Fields e_shnum e_shstrndx are only known after all section have
- // been emitted. They locations in the ouput buffer are recorded so
- // to be patched up later.
- //
- // Note
- // ----
- // emitWord method behaves differently for ELF32 and ELF64, writing
- // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
-
- ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
- ElfHdr.emitByte('E'); // e_ident[EI_MAG1]
- ElfHdr.emitByte('L'); // e_ident[EI_MAG2]
- ElfHdr.emitByte('F'); // e_ident[EI_MAG3]
-
- ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
- ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
- ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION]
- ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
-
- ElfHdr.emitWord16(ELF::ET_REL); // e_type
- ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
- ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version
- ElfHdr.emitWord(0); // e_entry, no entry point in .o file
- ElfHdr.emitWord(0); // e_phoff, no program header for .o
- ELFHdr_e_shoff_Offset = ElfHdr.size();
- ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes
- ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants
- ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size
- ElfHdr.emitWord16(0); // e_phentsize = prog header entry size
- ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0
-
- // e_shentsize = Section header entry size
- ElfHdr.emitWord16(TEW->getSHdrSize());
-
- // e_shnum = # of section header ents
- ELFHdr_e_shnum_Offset = ElfHdr.size();
- ElfHdr.emitWord16(0); // Placeholder
-
- // e_shstrndx = Section # of '.shstrtab'
- ELFHdr_e_shstrndx_Offset = ElfHdr.size();
- ElfHdr.emitWord16(0); // Placeholder
-
- // Add the null section, which is required to be first in the file.
- getNullSection();
-
- // The first entry in the symtab is the null symbol and the second
- // is a local symbol containing the module/file name
- SymbolList.push_back(new ELFSym());
- SymbolList.push_back(ELFSym::getFileSym());
-
- return false;
-}
-
-// AddPendingGlobalSymbol - Add a global to be processed and to
-// the global symbol lookup, use a zero index because the table
-// index will be determined later.
-void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV,
- bool AddToLookup /* = false */) {
- PendingGlobals.insert(GV);
- if (AddToLookup)
- GblSymLookup[GV] = 0;
-}
-
-// AddPendingExternalSymbol - Add the external to be processed
-// and to the external symbol lookup, use a zero index because
-// the symbol table index will be determined later.
-void ELFWriter::AddPendingExternalSymbol(const char *External) {
- PendingExternals.insert(External);
- ExtSymLookup[External] = 0;
-}
-
-ELFSection &ELFWriter::getDataSection() {
- const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
- return getSection(Data->getSectionName(), Data->getType(),
- Data->getFlags(), 4);
-}
-
-ELFSection &ELFWriter::getBSSSection() {
- const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
- return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
-}
-
-// getCtorSection - Get the static constructor section
-ELFSection &ELFWriter::getCtorSection() {
- const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
- return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags());
-}
-
-// getDtorSection - Get the static destructor section
-ELFSection &ELFWriter::getDtorSection() {
- const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
- return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
-}
-
-// getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(const Function *F) {
- const MCSectionELF *Text =
- (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
- return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
-}
-
-// getJumpTableSection - Get a read only section for constants when
-// emitting jump tables. TODO: add PIC support
-ELFSection &ELFWriter::getJumpTableSection() {
- const MCSectionELF *JT =
- (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
- return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
- TM.getTargetData()->getPointerABIAlignment());
-}
-
-// getConstantPoolSection - Get a constant pool section based on the machine
-// constant pool entry type and relocation info.
-ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
- SectionKind Kind;
- switch (CPE.getRelocationInfo()) {
- default: llvm_unreachable("Unknown section kind");
- case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
- case 1:
- Kind = SectionKind::getReadOnlyWithRelLocal();
- break;
- case 0:
- switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
- case 4: Kind = SectionKind::getMergeableConst4(); break;
- case 8: Kind = SectionKind::getMergeableConst8(); break;
- case 16: Kind = SectionKind::getMergeableConst16(); break;
- default: Kind = SectionKind::getMergeableConst(); break;
- }
- }
-
- const MCSectionELF *CPSect =
- (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
- return getSection(CPSect->getSectionName(), CPSect->getType(),
- CPSect->getFlags(), CPE.getAlignment());
-}
-
-// getRelocSection - Return the relocation section of section 'S'. 'RelA'
-// is true if the relocation section contains entries with addends.
-ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
- unsigned SectionType = TEW->hasRelocationAddend() ?
- ELF::SHT_RELA : ELF::SHT_REL;
-
- std::string SectionName(".rel");
- if (TEW->hasRelocationAddend())
- SectionName.append("a");
- SectionName.append(S.getName());
-
- return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
-}
-
-// getGlobalELFVisibility - Returns the ELF specific visibility type
-unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
- switch (GV->getVisibility()) {
- default:
- llvm_unreachable("unknown visibility type");
- case GlobalValue::DefaultVisibility:
- return ELF::STV_DEFAULT;
- case GlobalValue::HiddenVisibility:
- return ELF::STV_HIDDEN;
- case GlobalValue::ProtectedVisibility:
- return ELF::STV_PROTECTED;
- }
- return 0;
-}
-
-// getGlobalELFBinding - Returns the ELF specific binding type
-unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
- if (GV->hasInternalLinkage())
- return ELF::STB_LOCAL;
-
- if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
- return ELF::STB_WEAK;
-
- return ELF::STB_GLOBAL;
-}
-
-// getGlobalELFType - Returns the ELF specific type for a global
-unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
- if (GV->isDeclaration())
- return ELF::STT_NOTYPE;
-
- if (isa<Function>(GV))
- return ELF::STT_FUNC;
-
- return ELF::STT_OBJECT;
-}
-
-// IsELFUndefSym - True if the global value must be marked as a symbol
-// which points to a SHN_UNDEF section. This means that the symbol has
-// no definition on the module.
-static bool IsELFUndefSym(const GlobalValue *GV) {
- return GV->isDeclaration() || (isa<Function>(GV));
-}
-
-// AddToSymbolList - Update the symbol lookup and If the symbol is
-// private add it to PrivateSyms list, otherwise to SymbolList.
-void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
- assert(GblSym->isGlobalValue() && "Symbol must be a global value");
-
- const GlobalValue *GV = GblSym->getGlobalValue();
- if (GV->hasPrivateLinkage()) {
- // For a private symbols, keep track of the index inside
- // the private list since it will never go to the symbol
- // table and won't be patched up later.
- PrivateSyms.push_back(GblSym);
- GblSymLookup[GV] = PrivateSyms.size()-1;
- } else {
- // Non private symbol are left with zero indices until
- // they are patched up during the symbol table emition
- // (where the indicies are created).
- SymbolList.push_back(GblSym);
- GblSymLookup[GV] = 0;
- }
-}
-
-/// HasCommonSymbols - True if this section holds common symbols, this is
-/// indicated on the ELF object file by a symbol with SHN_COMMON section
-/// header index.
-static bool HasCommonSymbols(const MCSectionELF &S) {
- // FIXME: this is wrong, a common symbol can be in .data for example.
- if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
- return true;
-
- return false;
-}
-
-
-// EmitGlobal - Choose the right section for global and emit it
-void ELFWriter::EmitGlobal(const GlobalValue *GV) {
-
- // Check if the referenced symbol is already emitted
- if (GblSymLookup.find(GV) != GblSymLookup.end())
- return;
-
- // Handle ELF Bind, Visibility and Type for the current symbol
- unsigned SymBind = getGlobalELFBinding(GV);
- unsigned SymType = getGlobalELFType(GV);
- bool IsUndefSym = IsELFUndefSym(GV);
-
- ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
- : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
-
- if (!IsUndefSym) {
- assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-
- // Handle special llvm globals
- if (EmitSpecialLLVMGlobal(GVar))
- return;
-
- // Get the ELF section where this global belongs from TLOF
- const MCSectionELF *S =
- (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
- ELFSection &ES =
- getSection(S->getSectionName(), S->getType(), S->getFlags());
- SectionKind Kind = S->getKind();
-
- // The symbol align should update the section alignment if needed
- const TargetData *TD = TM.getTargetData();
- unsigned Align = TD->getPreferredAlignment(GVar);
- unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
- GblSym->Size = Size;
-
- if (HasCommonSymbols(*S)) { // Symbol must go to a common section
- GblSym->SectionIdx = ELF::SHN_COMMON;
-
- // A new linkonce section is created for each global in the
- // common section, the default alignment is 1 and the symbol
- // value contains its alignment.
- ES.Align = 1;
- GblSym->Value = Align;
-
- } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
- GblSym->SectionIdx = ES.SectionIdx;
-
- // Update the size with alignment and the next object can
- // start in the right offset in the section
- if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
- ES.Align = std::max(ES.Align, Align);
-
- // GblSym->Value should contain the virtual offset inside the section.
- // Virtual because the BSS space is not allocated on ELF objects
- GblSym->Value = ES.Size;
- ES.Size += Size;
-
- } else { // The symbol must go to some kind of data section
- GblSym->SectionIdx = ES.SectionIdx;
-
- // GblSym->Value should contain the symbol offset inside the section,
- // and all symbols should start on their required alignment boundary
- ES.Align = std::max(ES.Align, Align);
- ES.emitAlignment(Align);
- GblSym->Value = ES.size();
-
- // Emit the global to the data section 'ES'
- EmitGlobalConstant(GVar->getInitializer(), ES);
- }
- }
-
- AddToSymbolList(GblSym);
-}
-
-void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
- ELFSection &GblS) {
-
- // Print the fields in successive locations. Pad to align if needed!
- const TargetData *TD = TM.getTargetData();
- unsigned Size = TD->getTypeAllocSize(CVS->getType());
- const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
- uint64_t sizeSoFar = 0;
- for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
- const Constant* field = CVS->getOperand(i);
-
- // Check if padding is needed and insert one or more 0s.
- uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
- uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
- - cvsLayout->getElementOffset(i)) - fieldSize;
- sizeSoFar += fieldSize + padSize;
-
- // Now print the actual field value.
- EmitGlobalConstant(field, GblS);
-
- // Insert padding - this may include padding to increase the size of the
- // current field up to the ABI size (if the struct is not packed) as well
- // as padding to ensure that the next field starts at the right offset.
- GblS.emitZeros(padSize);
- }
- assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
- "Layout of constant struct may be incorrect!");
-}
-
-void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
- const TargetData *TD = TM.getTargetData();
- unsigned Size = TD->getTypeAllocSize(CV->getType());
-
- if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
- for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
- EmitGlobalConstant(CVA->getOperand(i), GblS);
- return;
- } else if (isa<ConstantAggregateZero>(CV)) {
- GblS.emitZeros(Size);
- return;
- } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
- EmitGlobalConstantStruct(CVS, GblS);
- return;
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- APInt Val = CFP->getValueAPF().bitcastToAPInt();
- if (CFP->getType()->isDoubleTy())
- GblS.emitWord64(Val.getZExtValue());
- else if (CFP->getType()->isFloatTy())
- GblS.emitWord32(Val.getZExtValue());
- else if (CFP->getType()->isX86_FP80Ty()) {
- unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
- TD->getTypeStoreSize(CFP->getType());
- GblS.emitWordFP80(Val.getRawData(), PadSize);
- } else if (CFP->getType()->isPPC_FP128Ty())
- llvm_unreachable("PPC_FP128Ty global emission not implemented");
- return;
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- if (Size == 1)
- GblS.emitByte(CI->getZExtValue());
- else if (Size == 2)
- GblS.emitWord16(CI->getZExtValue());
- else if (Size == 4)
- GblS.emitWord32(CI->getZExtValue());
- else
- EmitGlobalConstantLargeInt(CI, GblS);
- return;
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
- VectorType *PTy = CP->getType();
- for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
- EmitGlobalConstant(CP->getOperand(I), GblS);
- return;
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
- // Resolve a constant expression which returns a (Constant, Offset)
- // pair. If 'Res.first' is a GlobalValue, emit a relocation with
- // the offset 'Res.second', otherwise emit a global constant like
- // it is always done for not contant expression types.
- CstExprResTy Res = ResolveConstantExpr(CE);
- const Constant *Op = Res.first;
-
- if (isa<GlobalValue>(Op))
- EmitGlobalDataRelocation(cast<const GlobalValue>(Op),
- TD->getTypeAllocSize(Op->getType()),
- GblS, Res.second);
- else
- EmitGlobalConstant(Op, GblS);
-
- return;
- } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
- // Fill the data entry with zeros or emit a relocation entry
- if (isa<ConstantPointerNull>(CV))
- GblS.emitZeros(Size);
- else
- EmitGlobalDataRelocation(cast<const GlobalValue>(CV),
- Size, GblS);
- return;
- } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- // This is a constant address for a global variable or function and
- // therefore must be referenced using a relocation entry.
- EmitGlobalDataRelocation(GV, Size, GblS);
- return;
- }
-
- std::string msg;
- raw_string_ostream ErrorMsg(msg);
- ErrorMsg << "Constant unimp for type: " << *CV->getType();
- report_fatal_error(ErrorMsg.str());
-}
-
-// ResolveConstantExpr - Resolve the constant expression until it stop
-// yielding other constant expressions.
-CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
- const TargetData *TD = TM.getTargetData();
-
- // There ins't constant expression inside others anymore
- if (!isa<ConstantExpr>(CV))
- return std::make_pair(CV, 0);
-
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- switch (CE->getOpcode()) {
- case Instruction::BitCast:
- return ResolveConstantExpr(CE->getOperand(0));
-
- case Instruction::GetElementPtr: {
- const Constant *ptrVal = CE->getOperand(0);
- SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
- int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec);
- return std::make_pair(ptrVal, Offset);
- }
- case Instruction::IntToPtr: {
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
- false/*ZExt*/);
- return ResolveConstantExpr(Op);
- }
- case Instruction::PtrToInt: {
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
-
- // We can emit the pointer value into this slot if the slot is an
- // integer slot greater or equal to the size of the pointer.
- if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
- return ResolveConstantExpr(Op);
-
- llvm_unreachable("Integer size less then pointer size");
- }
- case Instruction::Add:
- case Instruction::Sub: {
- // Only handle cases where there's a constant expression with GlobalValue
- // as first operand and ConstantInt as second, which are the cases we can
- // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
- // 1) Instruction::Add => (global) + CstInt
- // 2) Instruction::Sub => (global) + -CstInt
- const Constant *Op0 = CE->getOperand(0);
- const Constant *Op1 = CE->getOperand(1);
- assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
-
- CstExprResTy Res = ResolveConstantExpr(Op0);
- assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
-
- const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
- switch (CE->getOpcode()) {
- case Instruction::Add:
- return std::make_pair(Res.first, RHS.getSExtValue());
- case Instruction::Sub:
- return std::make_pair(Res.first, (-RHS).getSExtValue());
- }
- }
- }
-
- report_fatal_error(CE->getOpcodeName() +
- StringRef(": Unsupported ConstantExpr type"));
-
- return std::make_pair(CV, 0); // silence warning
-}
-
-void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
- ELFSection &GblS, int64_t Offset) {
- // Create the relocation entry for the global value
- MachineRelocation MR =
- MachineRelocation::getGV(GblS.getCurrentPCOffset(),
- TEW->getAbsoluteLabelMachineRelTy(),
- const_cast<GlobalValue*>(GV),
- Offset);
-
- // Fill the data entry with zeros
- GblS.emitZeros(Size);
-
- // Add the relocation entry for the current data section
- GblS.addRelocation(MR);
-}
-
-void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
- ELFSection &S) {
- const TargetData *TD = TM.getTargetData();
- unsigned BitWidth = CI->getBitWidth();
- assert(isPowerOf2_32(BitWidth) &&
- "Non-power-of-2-sized integers not handled!");
-
- const uint64_t *RawData = CI->getValue().getRawData();
- uint64_t Val = 0;
- for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
- Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
- S.emitWord64(Val);
- }
-}
-
-/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
-/// special global used by LLVM. If so, emit it and return true, otherwise
-/// do nothing and return false.
-bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
- if (GV->getName() == "llvm.used")
- llvm_unreachable("not implemented yet");
-
- // Ignore debug and non-emitted data. This handles llvm.compiler.used.
- if (GV->getSection() == "llvm.metadata" ||
- GV->hasAvailableExternallyLinkage())
- return true;
-
- if (!GV->hasAppendingLinkage()) return false;
-
- assert(GV->hasInitializer() && "Not a special LLVM global!");
-
- const TargetData *TD = TM.getTargetData();
- unsigned Align = TD->getPointerPrefAlignment();
- if (GV->getName() == "llvm.global_ctors") {
- ELFSection &Ctor = getCtorSection();
- Ctor.emitAlignment(Align);
- EmitXXStructorList(GV->getInitializer(), Ctor);
- return true;
- }
-
- if (GV->getName() == "llvm.global_dtors") {
- ELFSection &Dtor = getDtorSection();
- Dtor.emitAlignment(Align);
- EmitXXStructorList(GV->getInitializer(), Dtor);
- return true;
- }
-
- return false;
-}
-
-/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the
-/// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
- // Should be an array of '{ i32, void ()* }' structs. The first value is the
- // init priority, which we ignore.
- if (List->isNullValue()) return;
- const ConstantArray *InitList = cast<ConstantArray>(List);
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
- if (InitList->getOperand(i)->isNullValue())
- continue;
- ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
-
- if (CS->getOperand(1)->isNullValue())
- continue;
-
- // Emit the function pointer.
- EmitGlobalConstant(CS->getOperand(1), Xtor);
- }
-}
-
-bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
- // Nothing to do here, this is all done through the ElfCE object above.
- return false;
-}
-
-/// doFinalization - Now that the module has been completely processed, emit
-/// the ELF file to 'O'.
-bool ELFWriter::doFinalization(Module &M) {
- // Emit .data section placeholder
- getDataSection();
-
- // Emit .bss section placeholder
- getBSSSection();
-
- // Build and emit data, bss and "common" sections.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- EmitGlobal(I);
-
- // Emit all pending globals
- for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
- I != E; ++I)
- EmitGlobal(*I);
-
- // Emit all pending externals
- for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
- I != E; ++I)
- SymbolList.push_back(ELFSym::getExtSym(*I));
-
- // Emit a symbol for each section created until now, skip null section
- for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
- ELFSection &ES = *SectionList[i];
- ELFSym *SectionSym = ELFSym::getSectionSym();
- SectionSym->SectionIdx = ES.SectionIdx;
- SymbolList.push_back(SectionSym);
- ES.Sym = SymbolList.back();
- }
-
- // Emit string table
- EmitStringTable(M.getModuleIdentifier());
-
- // Emit the symbol table now, if non-empty.
- EmitSymbolTable();
-
- // Emit the relocation sections.
- EmitRelocations();
-
- // Emit the sections string table.
- EmitSectionTableStringTable();
-
- // Dump the sections and section table to the .o file.
- OutputSectionsAndSectionTable();
-
- return false;
-}
-
-// RelocateField - Patch relocatable field with 'Offset' in 'BO'
-// using a 'Value' of known 'Size'
-void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
- int64_t Value, unsigned Size) {
- if (Size == 32)
- BO.fixWord32(Value, Offset);
- else if (Size == 64)
- BO.fixWord64(Value, Offset);
- else
- llvm_unreachable("don't know howto patch relocatable field");
-}
-
-/// EmitRelocations - Emit relocations
-void ELFWriter::EmitRelocations() {
-
- // True if the target uses the relocation entry to hold the addend,
- // otherwise the addend is written directly to the relocatable field.
- bool HasRelA = TEW->hasRelocationAddend();
-
- // Create Relocation sections for each section which needs it.
- for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
- ELFSection &S = *SectionList[i];
-
- // This section does not have relocations
- if (!S.hasRelocations()) continue;
- ELFSection &RelSec = getRelocSection(S);
-
- // 'Link' - Section hdr idx of the associated symbol table
- // 'Info' - Section hdr idx of the section to which the relocation applies
- ELFSection &SymTab = getSymbolTableSection();
- RelSec.Link = SymTab.SectionIdx;
- RelSec.Info = S.SectionIdx;
- RelSec.EntSize = TEW->getRelocationEntrySize();
-
- // Get the relocations from Section
- std::vector<MachineRelocation> Relos = S.getRelocations();
- for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
- MRE = Relos.end(); MRI != MRE; ++MRI) {
- MachineRelocation &MR = *MRI;
-
- // Relocatable field offset from the section start
- unsigned RelOffset = MR.getMachineCodeOffset();
-
- // Symbol index in the symbol table
- unsigned SymIdx = 0;
-
- // Target specific relocation field type and size
- unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
- unsigned RelTySize = TEW->getRelocationTySize(RelType);
- int64_t Addend = 0;
-
- // There are several machine relocations types, and each one of
- // them needs a different approach to retrieve the symbol table index.
- if (MR.isGlobalValue()) {
- const GlobalValue *G = MR.getGlobalValue();
- int64_t GlobalOffset = MR.getConstantVal();
- SymIdx = GblSymLookup[G];
- if (G->hasPrivateLinkage()) {
- // If the target uses a section offset in the relocation:
- // SymIdx + Addend = section sym for global + section offset
- unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
- Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
- SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
- } else {
- Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
- }
- } else if (MR.isExternalSymbol()) {
- const char *ExtSym = MR.getExternalSymbol();
- SymIdx = ExtSymLookup[ExtSym];
- Addend = TEW->getDefaultAddendForRelTy(RelType);
- } else {
- // Get the symbol index for the section symbol
- unsigned SectionIdx = MR.getConstantVal();
- SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
-
- // The symbol offset inside the section
- int64_t SymOffset = (int64_t)MR.getResultPointer();
-
- // For pc relative relocations where symbols are defined in the same
- // section they are referenced, ignore the relocation entry and patch
- // the relocatable field with the symbol offset directly.
- if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
- int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
- RelocateField(S, RelOffset, Value, RelTySize);
- continue;
- }
-
- Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
- }
-
- // The target without addend on the relocation symbol must be
- // patched in the relocation place itself to contain the addend
- // otherwise write zeros to make sure there is no garbage there
- RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
-
- // Get the relocation entry and emit to the relocation section
- ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
- EmitRelocation(RelSec, Rel, HasRelA);
- }
- }
-}
-
-/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
-void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
- bool HasRelA) {
- RelSec.emitWord(Rel.getOffset());
- RelSec.emitWord(Rel.getInfo(is64Bit));
- if (HasRelA)
- RelSec.emitWord(Rel.getAddend());
-}
-
-/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
-void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
- if (is64Bit) {
- SymbolTable.emitWord32(Sym.NameIdx);
- SymbolTable.emitByte(Sym.Info);
- SymbolTable.emitByte(Sym.Other);
- SymbolTable.emitWord16(Sym.SectionIdx);
- SymbolTable.emitWord64(Sym.Value);
- SymbolTable.emitWord64(Sym.Size);
- } else {
- SymbolTable.emitWord32(Sym.NameIdx);
- SymbolTable.emitWord32(Sym.Value);
- SymbolTable.emitWord32(Sym.Size);
- SymbolTable.emitByte(Sym.Info);
- SymbolTable.emitByte(Sym.Other);
- SymbolTable.emitWord16(Sym.SectionIdx);
- }
-}
-
-/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
-/// Section Header Table
-void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
- const ELFSection &SHdr) {
- SHdrTab.emitWord32(SHdr.NameIdx);
- SHdrTab.emitWord32(SHdr.Type);
- if (is64Bit) {
- SHdrTab.emitWord64(SHdr.Flags);
- SHdrTab.emitWord(SHdr.Addr);
- SHdrTab.emitWord(SHdr.Offset);
- SHdrTab.emitWord64(SHdr.Size);
- SHdrTab.emitWord32(SHdr.Link);
- SHdrTab.emitWord32(SHdr.Info);
- SHdrTab.emitWord64(SHdr.Align);
- SHdrTab.emitWord64(SHdr.EntSize);
- } else {
- SHdrTab.emitWord32(SHdr.Flags);
- SHdrTab.emitWord(SHdr.Addr);
- SHdrTab.emitWord(SHdr.Offset);
- SHdrTab.emitWord32(SHdr.Size);
- SHdrTab.emitWord32(SHdr.Link);
- SHdrTab.emitWord32(SHdr.Info);
- SHdrTab.emitWord32(SHdr.Align);
- SHdrTab.emitWord32(SHdr.EntSize);
- }
-}
-
-/// EmitStringTable - If the current symbol table is non-empty, emit the string
-/// table for it
-void ELFWriter::EmitStringTable(const std::string &ModuleName) {
- if (!SymbolList.size()) return; // Empty symbol table.
- ELFSection &StrTab = getStringTableSection();
-
- // Set the zero'th symbol to a null byte, as required.
- StrTab.emitByte(0);
-
- // Walk on the symbol list and write symbol names into the string table.
- unsigned Index = 1;
- for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
- ELFSym &Sym = *(*I);
-
- std::string Name;
- if (Sym.isGlobalValue()) {
- SmallString<40> NameStr;
- Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
- Name.append(NameStr.begin(), NameStr.end());
- } else if (Sym.isExternalSym())
- Name.append(Sym.getExternalSymbol());
- else if (Sym.isFileType())
- Name.append(ModuleName);
-
- if (Name.empty()) {
- Sym.NameIdx = 0;
- } else {
- Sym.NameIdx = Index;
- StrTab.emitString(Name);
-
- // Keep track of the number of bytes emitted to this section.
- Index += Name.size()+1;
- }
- }
- assert(Index == StrTab.size());
- StrTab.Size = Index;
-}
-
-// SortSymbols - On the symbol table local symbols must come before
-// all other symbols with non-local bindings. The return value is
-// the position of the first non local symbol.
-unsigned ELFWriter::SortSymbols() {
- unsigned FirstNonLocalSymbol;
- std::vector<ELFSym*> LocalSyms, OtherSyms;
-
- for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
- if ((*I)->isLocalBind())
- LocalSyms.push_back(*I);
- else
- OtherSyms.push_back(*I);
- }
- SymbolList.clear();
- FirstNonLocalSymbol = LocalSyms.size();
-
- for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
- SymbolList.push_back(LocalSyms[i]);
-
- for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
- SymbolList.push_back(*I);
-
- LocalSyms.clear();
- OtherSyms.clear();
-
- return FirstNonLocalSymbol;
-}
-
-/// EmitSymbolTable - Emit the symbol table itself.
-void ELFWriter::EmitSymbolTable() {
- if (!SymbolList.size()) return; // Empty symbol table.
-
- // Now that we have emitted the string table and know the offset into the
- // string table of each symbol, emit the symbol table itself.
- ELFSection &SymTab = getSymbolTableSection();
- SymTab.Align = TEW->getPrefELFAlignment();
-
- // Section Index of .strtab.
- SymTab.Link = getStringTableSection().SectionIdx;
-
- // Size of each symtab entry.
- SymTab.EntSize = TEW->getSymTabEntrySize();
-
- // Reorder the symbol table with local symbols first!
- unsigned FirstNonLocalSymbol = SortSymbols();
-
- // Emit all the symbols to the symbol table.
- for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
- ELFSym &Sym = *SymbolList[i];
-
- // Emit symbol to the symbol table
- EmitSymbol(SymTab, Sym);
-
- // Record the symbol table index for each symbol
- if (Sym.isGlobalValue())
- GblSymLookup[Sym.getGlobalValue()] = i;
- else if (Sym.isExternalSym())
- ExtSymLookup[Sym.getExternalSymbol()] = i;
-
- // Keep track on the symbol index into the symbol table
- Sym.SymTabIdx = i;
- }
-
- // One greater than the symbol table index of the last local symbol
- SymTab.Info = FirstNonLocalSymbol;
- SymTab.Size = SymTab.size();
-}
-
-/// EmitSectionTableStringTable - This method adds and emits a section for the
-/// ELF Section Table string table: the string table that holds all of the
-/// section names.
-void ELFWriter::EmitSectionTableStringTable() {
- // First step: add the section for the string table to the list of sections:
- ELFSection &SHStrTab = getSectionHeaderStringTableSection();
-
- // Now that we know which section number is the .shstrtab section, update the
- // e_shstrndx entry in the ELF header.
- ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
-
- // Set the NameIdx of each section in the string table and emit the bytes for
- // the string table.
- unsigned Index = 0;
-
- for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
- ELFSection &S = *(*I);
- // Set the index into the table. Note if we have lots of entries with
- // common suffixes, we could memoize them here if we cared.
- S.NameIdx = Index;
- SHStrTab.emitString(S.getName());
-
- // Keep track of the number of bytes emitted to this section.
- Index += S.getName().size()+1;
- }
-
- // Set the size of .shstrtab now that we know what it is.
- assert(Index == SHStrTab.size());
- SHStrTab.Size = Index;
-}
-
-/// OutputSectionsAndSectionTable - Now that we have constructed the file header
-/// and all of the sections, emit these to the ostream destination and emit the
-/// SectionTable.
-void ELFWriter::OutputSectionsAndSectionTable() {
- // Pass #1: Compute the file offset for each section.
- size_t FileOff = ElfHdr.size(); // File header first.
-
- // Adjust alignment of all section if needed, skip the null section.
- for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
- ELFSection &ES = *SectionList[i];
- if (!ES.size()) {
- ES.Offset = FileOff;
- continue;
- }
-
- // Update Section size
- if (!ES.Size)
- ES.Size = ES.size();
-
- // Align FileOff to whatever the alignment restrictions of the section are.
- if (ES.Align)
- FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
-
- ES.Offset = FileOff;
- FileOff += ES.Size;
- }
-
- // Align Section Header.
- unsigned TableAlign = TEW->getPrefELFAlignment();
- FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
-
- // Now that we know where all of the sections will be emitted, set the e_shnum
- // entry in the ELF header.
- ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
-
- // Now that we know the offset in the file of the section table, update the
- // e_shoff address in the ELF header.
- ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
-
- // Now that we know all of the data in the file header, emit it and all of the
- // sections!
- O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
- FileOff = ElfHdr.size();
-
- // Section Header Table blob
- BinaryObject SHdrTable(isLittleEndian, is64Bit);
-
- // Emit all of sections to the file and build the section header table.
- for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
- ELFSection &S = *(*I);
- DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
- << ", Size: " << S.Size << ", Offset: " << S.Offset
- << ", SectionData Size: " << S.size() << "\n");
-
- // Align FileOff to whatever the alignment restrictions of the section are.
- if (S.size()) {
- if (S.Align) {
- for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
- FileOff != NewFileOff; ++FileOff)
- O << (char)0xAB;
- }
- O.write((char *)&S.getData()[0], S.Size);
- FileOff += S.Size;
- }
-
- EmitSectionHeader(SHdrTable, S);
- }
-
- // Align output for the section table.
- for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
- FileOff != NewFileOff; ++FileOff)
- O << (char)0xAB;
-
- // Emit the section table itself.
- O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
-}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
deleted file mode 100644
index 6f7fbace8aba..000000000000
--- a/lib/CodeGen/ELFWriter.h
+++ /dev/null
@@ -1,251 +0,0 @@
-//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ELFWriter class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFWRITER_H
-#define ELFWRITER_H
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include <map>
-
-namespace llvm {
- class BinaryObject;
- class Constant;
- class ConstantInt;
- class ConstantStruct;
- class ELFCodeEmitter;
- class ELFRelocation;
- class ELFSection;
- struct ELFSym;
- class GlobalVariable;
- class JITDebugRegisterer;
- class Mangler;
- class MachineCodeEmitter;
- class MachineConstantPoolEntry;
- class ObjectCodeEmitter;
- class MCAsmInfo;
- class TargetELFWriterInfo;
- class TargetLoweringObjectFile;
- class raw_ostream;
- class SectionKind;
- class MCContext;
- class TargetMachine;
-
- typedef std::vector<ELFSym*>::iterator ELFSymIter;
- typedef std::vector<ELFSection*>::iterator ELFSectionIter;
- typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
- typedef SetVector<const char *>::const_iterator PendingExtsIter;
- typedef std::pair<const Constant *, int64_t> CstExprResTy;
-
- /// ELFWriter - This class implements the common target-independent code for
- /// writing ELF files. Targets should derive a class from this to
- /// parameterize the output format.
- ///
- class ELFWriter : public MachineFunctionPass {
- friend class ELFCodeEmitter;
- friend class JITDebugRegisterer;
- public:
- static char ID;
-
- /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
- ObjectCodeEmitter *getObjectCodeEmitter() {
- return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
- }
-
- ELFWriter(raw_ostream &O, TargetMachine &TM);
- ~ELFWriter();
-
- protected:
- /// Output stream to send the resultant object file to.
- raw_ostream &O;
-
- /// Target machine description.
- TargetMachine &TM;
-
- /// Context object for machine code objects.
- MCContext &OutContext;
-
- /// Target Elf Writer description.
- const TargetELFWriterInfo *TEW;
-
- /// Mang - The object used to perform name mangling for this module.
- Mangler *Mang;
-
- /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
- /// code for functions to the .o file.
- ELFCodeEmitter *ElfCE;
-
- /// TLOF - Target Lowering Object File, provide section names for globals
- /// and other object file specific stuff
- const TargetLoweringObjectFile &TLOF;
-
- /// MAI - Target Asm Info, provide information about section names for
- /// globals and other target specific stuff.
- const MCAsmInfo *MAI;
-
- //===------------------------------------------------------------------===//
- // Properties inferred automatically from the target machine.
- //===------------------------------------------------------------------===//
-
- /// is64Bit/isLittleEndian - This information is inferred from the target
- /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
- bool is64Bit, isLittleEndian;
-
- /// doInitialization - Emit the file header and all of the global variables
- /// for the module to the ELF file.
- bool doInitialization(Module &M);
- bool runOnMachineFunction(MachineFunction &MF);
-
- /// doFinalization - Now that the module has been completely processed, emit
- /// the ELF file to 'O'.
- bool doFinalization(Module &M);
-
- private:
- /// Blob containing the Elf header
- BinaryObject ElfHdr;
-
- /// SectionList - This is the list of sections that we have emitted to the
- /// file. Once the file has been completely built, the section header table
- /// is constructed from this info.
- std::vector<ELFSection*> SectionList;
- unsigned NumSections; // Always = SectionList.size()
-
- /// SectionLookup - This is a mapping from section name to section number in
- /// the SectionList. Used to quickly gather the Section Index from MAI names
- std::map<std::string, ELFSection*> SectionLookup;
-
- /// PendingGlobals - Globals not processed as symbols yet.
- SetVector<const GlobalValue*> PendingGlobals;
-
- /// GblSymLookup - This is a mapping from global value to a symbol index
- /// in the symbol table or private symbols list. This is useful since reloc
- /// symbol references must be quickly mapped to their indices on the lists.
- std::map<const GlobalValue*, uint32_t> GblSymLookup;
-
- /// PendingExternals - Externals not processed as symbols yet.
- SetVector<const char *> PendingExternals;
-
- /// ExtSymLookup - This is a mapping from externals to a symbol index
- /// in the symbol table list. This is useful since reloc symbol references
- /// must be quickly mapped to their symbol table indices.
- std::map<const char *, uint32_t> ExtSymLookup;
-
- /// SymbolList - This is the list of symbols emitted to the symbol table.
- /// When the SymbolList is finally built, local symbols must be placed in
- /// the beginning while non-locals at the end.
- std::vector<ELFSym*> SymbolList;
-
- /// PrivateSyms - Record private symbols, every symbol here must never be
- /// present in the SymbolList.
- std::vector<ELFSym*> PrivateSyms;
-
- /// getSection - Return the section with the specified name, creating a new
- /// section if one does not already exist.
- ELFSection &getSection(const std::string &Name, unsigned Type,
- unsigned Flags = 0, unsigned Align = 0) {
- ELFSection *&SN = SectionLookup[Name];
- if (SN) return *SN;
-
- SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
- SN = SectionList.back();
- SN->SectionIdx = NumSections++;
- SN->Type = Type;
- SN->Flags = Flags;
- SN->Link = ELF::SHN_UNDEF;
- SN->Align = Align;
- return *SN;
- }
-
- ELFSection &getNonExecStackSection() {
- return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
- }
-
- ELFSection &getSymbolTableSection() {
- return getSection(".symtab", ELF::SHT_SYMTAB, 0);
- }
-
- ELFSection &getStringTableSection() {
- return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
- }
-
- ELFSection &getSectionHeaderStringTableSection() {
- return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
- }
-
- ELFSection &getNullSection() {
- return getSection("", ELF::SHT_NULL, 0);
- }
-
- ELFSection &getDataSection();
- ELFSection &getBSSSection();
- ELFSection &getCtorSection();
- ELFSection &getDtorSection();
- ELFSection &getJumpTableSection();
- ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
- ELFSection &getTextSection(const Function *F);
- ELFSection &getRelocSection(ELFSection &S);
-
- // Helpers for obtaining ELF specific info.
- unsigned getGlobalELFBinding(const GlobalValue *GV);
- unsigned getGlobalELFType(const GlobalValue *GV);
- unsigned getGlobalELFVisibility(const GlobalValue *GV);
-
- // AddPendingGlobalSymbol - Add a global to be processed and to
- // the global symbol lookup, use a zero index because the table
- // index will be determined later.
- void AddPendingGlobalSymbol(const GlobalValue *GV,
- bool AddToLookup = false);
-
- // AddPendingExternalSymbol - Add the external to be processed
- // and to the external symbol lookup, use a zero index because
- // the symbol table index will be determined later.
- void AddPendingExternalSymbol(const char *External);
-
- // AddToSymbolList - Update the symbol lookup and If the symbol is
- // private add it to PrivateSyms list, otherwise to SymbolList.
- void AddToSymbolList(ELFSym *GblSym);
-
- // As we complete the ELF file, we need to update fields in the ELF header
- // (e.g. the location of the section table). These members keep track of
- // the offset in ELFHeader of these various pieces to update and other
- // locations in the file.
- unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header.
- unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header.
- unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header.
-
- private:
- void EmitGlobal(const GlobalValue *GV);
- void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
- void EmitGlobalConstantStruct(const ConstantStruct *CVS,
- ELFSection &GblS);
- void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
- void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
- ELFSection &GblS, int64_t Offset = 0);
- bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
- void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
- void EmitRelocations();
- void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
- void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
- void EmitSectionTableStringTable();
- void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
- void EmitSymbolTable();
- void EmitStringTable(const std::string &ModuleName);
- void OutputSectionsAndSectionTable();
- void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
- unsigned Size);
- unsigned SortSymbols();
- CstExprResTy ResolveConstantExpr(const Constant *CV);
- };
-}
-
-#endif
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index a7aba89b87f3..3bb04657b58a 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -77,7 +77,7 @@ void EdgeBundles::view() const {
/// Specialize WriteGraph, the standard implementation won't work.
raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
bool ShortNames,
- const std::string &Title) {
+ const Twine &Title) {
const MachineFunction *MF = G.getMachineFunction();
O << "digraph {\n";
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 01dccdb71e4b..a48c5400abcb 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,7 +45,7 @@ using namespace llvm;
/// DomainValue for each register, but it may contain multiple execution
/// domains. A register value is initially created in a single execution
/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact the the register is now available in multiple
+/// keep track of the fact that the register is now available in multiple
/// domains.
namespace {
struct DomainValue {
@@ -57,8 +57,10 @@ struct DomainValue {
// domains where the register is available for free.
unsigned AvailableDomains;
- // Position of the last defining instruction.
- unsigned Dist;
+ // Pointer to the next DomainValue in a chain. When two DomainValues are
+ // merged, Victim.Next is set to point to Victor, so old DomainValue
+ // references can be updated by folowing the chain.
+ DomainValue *Next;
// Twiddleable instructions using or defining these registers.
SmallVector<MachineInstr*, 8> Instrs;
@@ -92,16 +94,33 @@ struct DomainValue {
return CountTrailingZeros_32(AvailableDomains);
}
- DomainValue() { clear(); }
+ DomainValue() : Refs(0) { clear(); }
+ // Clear this DomainValue and point to next which has all its data.
void clear() {
- Refs = AvailableDomains = Dist = 0;
+ AvailableDomains = 0;
+ Next = 0;
Instrs.clear();
}
};
}
namespace {
+/// LiveReg - Information about a live register.
+struct LiveReg {
+ /// Value currently in this register, or NULL when no value is being tracked.
+ /// This counts as a DomainValue reference.
+ DomainValue *Value;
+
+ /// Instruction that defined this register, relative to the beginning of the
+ /// current basic block. When a LiveReg is used to represent a live-out
+ /// register, this value is relative to the end of the basic block, so it
+ /// will be a negative number.
+ int Def;
+};
+} // anonynous namespace
+
+namespace {
class ExeDepsFix : public MachineFunctionPass {
static char ID;
SpecificBumpPtrAllocator<DomainValue> Allocator;
@@ -111,13 +130,19 @@ class ExeDepsFix : public MachineFunctionPass {
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineBasicBlock *MBB;
std::vector<int> AliasMap;
const unsigned NumRegs;
- DomainValue **LiveRegs;
- typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveReg *LiveRegs;
+ typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
LiveOutMap LiveOuts;
- unsigned Distance;
+
+ /// Current instruction number.
+ /// The first instruction in each basic block is 0.
+ int CurInstr;
+
+ /// True when the current block has a predecessor that hasn't been visited
+ /// yet.
+ bool SeenUnknownBackEdge;
public:
ExeDepsFix(const TargetRegisterClass *rc)
@@ -131,26 +156,33 @@ public:
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
- return "SSE execution domain fixup";
+ return "Execution dependency fix";
}
private:
// Register mapping.
- int RegIndex(unsigned Reg);
+ int regIndex(unsigned Reg);
// DomainValue allocation.
- DomainValue *Alloc(int domain = -1);
- void Recycle(DomainValue*);
+ DomainValue *alloc(int domain = -1);
+ DomainValue *retain(DomainValue *DV) {
+ if (DV) ++DV->Refs;
+ return DV;
+ }
+ void release(DomainValue*);
+ DomainValue *resolve(DomainValue*&);
// LiveRegs manipulations.
- void SetLiveReg(int rx, DomainValue *DV);
- void Kill(int rx);
- void Force(int rx, unsigned domain);
- void Collapse(DomainValue *dv, unsigned domain);
- bool Merge(DomainValue *A, DomainValue *B);
-
- void enterBasicBlock();
- void visitGenericInstr(MachineInstr*);
+ void setLiveReg(int rx, DomainValue *DV);
+ void kill(int rx);
+ void force(int rx, unsigned domain);
+ void collapse(DomainValue *dv, unsigned domain);
+ bool merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock(MachineBasicBlock*);
+ void leaveBasicBlock(MachineBasicBlock*);
+ void visitInstr(MachineInstr*);
+ void processDefs(MachineInstr*, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
};
@@ -160,83 +192,108 @@ char ExeDepsFix::ID = 0;
/// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers.
-int ExeDepsFix::RegIndex(unsigned Reg) {
+int ExeDepsFix::regIndex(unsigned Reg) {
assert(Reg < AliasMap.size() && "Invalid register");
return AliasMap[Reg];
}
-DomainValue *ExeDepsFix::Alloc(int domain) {
+DomainValue *ExeDepsFix::alloc(int domain) {
DomainValue *dv = Avail.empty() ?
new(Allocator.Allocate()) DomainValue :
Avail.pop_back_val();
- dv->Dist = Distance;
if (domain >= 0)
dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
return dv;
}
-void ExeDepsFix::Recycle(DomainValue *dv) {
- assert(dv && "Cannot recycle NULL");
- dv->clear();
- Avail.push_back(dv);
+/// release - Release a reference to DV. When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
+/// reached. Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
}
/// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index");
- if (!LiveRegs) {
- LiveRegs = new DomainValue*[NumRegs];
- std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
- }
+ assert(LiveRegs && "Must enter basic block first.");
- if (LiveRegs[rx] == dv)
+ if (LiveRegs[rx].Value == dv)
return;
- if (LiveRegs[rx]) {
- assert(LiveRegs[rx]->Refs && "Bad refcount");
- if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
- }
- LiveRegs[rx] = dv;
- if (dv) ++dv->Refs;
+ if (LiveRegs[rx].Value)
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = retain(dv);
}
// Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::Kill(int rx) {
+void ExeDepsFix::kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index");
- if (!LiveRegs || !LiveRegs[rx]) return;
-
- // Before killing the last reference to an open DomainValue, collapse it to
- // the first available domain.
- if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
- Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
- else
- SetLiveReg(rx, 0);
+ assert(LiveRegs && "Must enter basic block first.");
+ if (!LiveRegs[rx].Value)
+ return;
+
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = 0;
}
/// Force register rx into domain.
-void ExeDepsFix::Force(int rx, unsigned domain) {
+void ExeDepsFix::force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index");
- DomainValue *dv;
- if (LiveRegs && (dv = LiveRegs[rx])) {
+ assert(LiveRegs && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx].Value) {
if (dv->isCollapsed())
dv->addDomain(domain);
else if (dv->hasDomain(domain))
- Collapse(dv, domain);
+ collapse(dv, domain);
else {
// This is an incompatible open DomainValue. Collapse it to whatever and
// force the new value into domain. This costs a domain crossing.
- Collapse(dv, dv->getFirstDomain());
- assert(LiveRegs[rx] && "Not live after collapse?");
- LiveRegs[rx]->addDomain(domain);
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx].Value && "Not live after collapse?");
+ LiveRegs[rx].Value->addDomain(domain);
}
} else {
// Set up basic collapsed DomainValue.
- SetLiveReg(rx, Alloc(domain));
+ setLiveReg(rx, alloc(domain));
}
}
/// Collapse open DomainValue into given domain. If there are multiple
/// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse");
// Collapse all the instructions.
@@ -247,13 +304,13 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
// If there are multiple users, give them new, unique DomainValues.
if (LiveRegs && dv->Refs > 1)
for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx] == dv)
- SetLiveReg(rx, Alloc(domain));
+ if (LiveRegs[rx].Value == dv)
+ setLiveReg(rx, alloc(domain));
}
/// Merge - All instructions and registers in B are moved to A, and B is
/// released.
-bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
if (A == B)
@@ -263,47 +320,188 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
if (!common)
return false;
A->AvailableDomains = common;
- A->Dist = std::max(A->Dist, B->Dist);
A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx] == B)
- SetLiveReg(rx, A);
+ if (LiveRegs[rx].Value == B)
+ setLiveReg(rx, A);
return true;
}
-void ExeDepsFix::enterBasicBlock() {
- // Try to coalesce live-out registers from predecessors.
- for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+ // Detect back-edges from predecessors we haven't processed yet.
+ SeenUnknownBackEdge = false;
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ if (!LiveRegs)
+ LiveRegs = new LiveReg[NumRegs];
+
+ // Default values are 'nothing happened a long time ago'.
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ LiveRegs[rx].Value = 0;
+ LiveRegs[rx].Def = -(1 << 20);
+ }
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
- int rx = RegIndex(*i);
- if (rx < 0) continue;
- for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
- pe = MBB->pred_end(); pi != pe; ++pi) {
- LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) continue;
- DomainValue *pdv = fi->second[rx];
- if (!pdv) continue;
- if (!LiveRegs || !LiveRegs[rx]) {
- SetLiveReg(rx, pdv);
+ int rx = regIndex(*i);
+ if (rx < 0)
+ continue;
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[rx].Def = -1;
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) {
+ SeenUnknownBackEdge = true;
+ continue;
+ }
+ assert(fi->second && "Can't have NULL entries");
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ // Use the most recent predecessor def for each register.
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+ DomainValue *pdv = resolve(fi->second[rx].Value);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx].Value) {
+ setLiveReg(rx, pdv);
continue;
}
// We have a live DomainValue from more than one predecessor.
- if (LiveRegs[rx]->isCollapsed()) {
+ if (LiveRegs[rx].Value->isCollapsed()) {
// We are already collapsed, but predecessor is not. Force him.
- unsigned domain = LiveRegs[rx]->getFirstDomain();
- if (!pdv->isCollapsed() && pdv->hasDomain(domain))
- Collapse(pdv, domain);
+ unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
continue;
}
// Currently open, merge in predecessor.
if (!pdv->isCollapsed())
- Merge(LiveRegs[rx], pdv);
+ merge(LiveRegs[rx].Value, pdv);
else
- Force(rx, pdv->getFirstDomain());
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber()
+ << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+ assert(LiveRegs && "Must enter basic block first.");
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ // Also use LiveOuts as a visited set to detect back-edges.
+ bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+ if (First) {
+ // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
+ // the end of this block instead of the beginning.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ } else {
+ // Insertion failed, this must be the second pass.
+ // Release all the DomainValues instead of keeping them.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ release(LiveRegs[i].Value);
+ delete[] LiveRegs;
+ }
+ LiveRegs = 0;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return;
+
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ // Process defs to track register ages, and kill values clobbered by generic
+ // instructions.
+ processDefs(MI, !DomP.first);
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugValue() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isImplicit())
+ break;
+ if (MO.isUse())
+ continue;
+ int rx = regIndex(MO.getReg());
+ if (rx < 0)
+ continue;
+
+ // This instruction explicitly defines rx.
+ DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // How many instructions since rx was last written?
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ LiveRegs[rx].Def = CurInstr;
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+
+ // Verify clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (!Pref)
+ continue;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ TII->breakPartialRegDependency(MI, i, TRI);
+ continue;
+ }
+
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK.\n");
+ continue;
}
+
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
}
+
+ ++CurInstr;
}
// A hard instruction only works in one domain. All input registers will be
@@ -314,19 +512,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- Force(rx, domain);
+ force(rx, domain);
}
// Kill all defs and force them.
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- Kill(rx);
- Force(rx, domain);
+ kill(rx);
+ force(rx, domain);
}
}
@@ -343,9 +541,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- if (DomainValue *dv = LiveRegs[rx]) {
+ if (DomainValue *dv = LiveRegs[rx].Value) {
// Bitmask of domains that dv and available have in common.
unsigned common = dv->getCommonDomains(available);
// Is it possible to use this collapsed register for free?
@@ -360,7 +558,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
else
// Open DomainValue is not compatible with instruction. It is useless
// now.
- Kill(rx);
+ kill(rx);
}
}
@@ -374,94 +572,89 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Kill off any remaining uses that don't match available, and build a list of
// incoming DomainValues that we want to merge.
- SmallVector<DomainValue*,4> doms;
+ SmallVector<LiveReg, 4> Regs;
for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
int rx = *i;
- DomainValue *dv = LiveRegs[rx];
+ const LiveReg &LR = LiveRegs[rx];
// This useless DomainValue could have been missed above.
- if (!dv->getCommonDomains(available)) {
- Kill(*i);
+ if (!LR.Value->getCommonDomains(available)) {
+ kill(rx);
continue;
}
- // sorted, uniqued insert.
- bool inserted = false;
- for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
- i != e && !inserted; ++i) {
- if (dv == *i)
- inserted = true;
- else if (dv->Dist < (*i)->Dist) {
- inserted = true;
- doms.insert(i, dv);
+ // Sorted insertion.
+ bool Inserted = false;
+ for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+ i != e && !Inserted; ++i) {
+ if (LR.Def < i->Def) {
+ Inserted = true;
+ Regs.insert(i, LR);
}
}
- if (!inserted)
- doms.push_back(dv);
+ if (!Inserted)
+ Regs.push_back(LR);
}
// doms are now sorted in order of appearance. Try to merge them all, giving
// priority to the latest ones.
DomainValue *dv = 0;
- while (!doms.empty()) {
+ while (!Regs.empty()) {
if (!dv) {
- dv = doms.pop_back_val();
+ dv = Regs.pop_back_val().Value;
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
continue;
}
- DomainValue *latest = doms.pop_back_val();
- if (Merge(dv, latest)) continue;
+ DomainValue *Latest = Regs.pop_back_val().Value;
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
// If latest didn't merge, it is useless now. Kill all registers using it.
for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
- if (LiveRegs[*i] == latest)
- Kill(*i);
+ if (LiveRegs[*i].Value == Latest)
+ kill(*i);
}
// dv is the DomainValue we are going to use for this instruction.
- if (!dv)
- dv = Alloc();
- dv->Dist = Distance;
- dv->AvailableDomains = available;
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
dv->Instrs.push_back(mi);
// Finally set all defs and non-collapsed uses to dv.
for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
- Kill(rx);
- SetLiveReg(rx, dv);
+ if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
}
}
}
-void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
- // Process explicit defs, kill any relevant registers redefined.
- for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- Kill(rx);
- }
-}
-
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo();
- MBB = 0;
LiveRegs = 0;
- Distance = 0;
assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+ DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+ << RC->getName() << " **********\n");
+
// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
anyregs = true;
break;
}
@@ -473,43 +666,48 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// or -1.
AliasMap.resize(TRI->getNumRegs(), -1);
for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
- for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+ for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
AliasMap[*AI] = i;
}
MachineBasicBlock *Entry = MF->begin();
- SmallPtrSet<MachineBasicBlock*, 16> Visited;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
- DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
- DFI != DFE; ++DFI) {
- MBB = *DFI;
- enterBasicBlock();
+ ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+ SmallVector<MachineBasicBlock*, 16> Loops;
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ enterBasicBlock(MBB);
+ if (SeenUnknownBackEdge)
+ Loops.push_back(MBB);
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- MachineInstr *mi = I;
- if (mi->isDebugValue()) continue;
- ++Distance;
- std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
- if (domp.first)
- if (domp.second)
- visitSoftInstr(mi, domp.second);
- else
- visitHardInstr(mi, domp.first);
- else if (LiveRegs)
- visitGenericInstr(mi);
- }
+ ++I)
+ visitInstr(I);
+ leaveBasicBlock(MBB);
+ }
- // Save live registers at end of MBB - used by enterBasicBlock().
- if (LiveRegs)
- LiveOuts.insert(std::make_pair(MBB, LiveRegs));
- LiveRegs = 0;
+ // Visit all the loop blocks again in order to merge DomainValues from
+ // back-edges.
+ for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Loops[i];
+ enterBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ if (!I->isDebugValue())
+ processDefs(I, false);
+ leaveBasicBlock(MBB);
}
- // Clear the LiveOuts vectors. Should we also collapse any remaining
- // DomainValues?
- for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
- i != e; ++i)
- delete[] i->second;
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end() || !FI->second)
+ continue;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (FI->second[i].Value)
+ release(FI->second[i].Value);
+ delete[] FI->second;
+ }
LiveOuts.clear();
Avail.clear();
Allocator.DestroyAll();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index a67140ece4a5..2c4a93543cc3 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -32,10 +32,6 @@ namespace {
private:
virtual bool runOnMachineFunction(MachineFunction &MF);
- const char *getPassName() const {
- return "Expand ISel Pseudo-instructions";
- }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -43,12 +39,9 @@ namespace {
} // end anonymous namespace
char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
- "Expand CodeGen Pseudo-instructions", false, false)
-
-FunctionPass *llvm::createExpandISelPseudosPass() {
- return new ExpandISelPseudos();
-}
+ "Expand ISel Pseudo-instructions", false, false)
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
@@ -62,8 +55,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = MBBI++;
// If MI is a pseudo, expand it.
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.usesCustomInsertionHook()) {
+ if (MI->usesCustomInsertionHook()) {
Changed = true;
MachineBasicBlock *NewMBB =
TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e2a14a8dfd97..b14afc286d49 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -36,10 +36,6 @@ public:
static char ID; // Pass identification, replacement for typeid
ExpandPostRA() : MachineFunctionPass(ID) {}
- const char *getPassName() const {
- return "Post-RA pseudo instruction expansion pass";
- }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreservedID(MachineLoopInfoID);
@@ -61,10 +57,10 @@ private:
} // end anonymous namespace
char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
-FunctionPass *llvm::createExpandPostRAPseudosPass() {
- return new ExpandPostRA();
-}
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+ "Post-RA pseudo instruction expansion pass", false, false)
/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
/// and the lowered replacement instructions immediately precede it.
@@ -207,7 +203,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
++mi;
// Only expand pseudos.
- if (!MI->getDesc().isPseudo())
+ if (!MI->isPseudo())
continue;
// Give targets a chance to expand even standard pseudos.
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index d757cf409d50..1caf8c233976 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -143,12 +143,12 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
static const char *DescKind(GC::PointKind Kind) {
switch (Kind) {
- default: llvm_unreachable("Unknown GC point kind");
case GC::Loop: return "loop";
case GC::Return: return "return";
case GC::PreCall: return "pre-call";
case GC::PostCall: return "post-call";
}
+ llvm_unreachable("Invalid point kind");
}
bool Printer::runOnFunction(Function &F) {
@@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) {
GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
- OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+ OS << "GC roots for " << FD->getFunction().getName() << ":\n";
for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
RE = FD->roots_end(); RI != RE; ++RI)
OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
- OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+ OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
for (GCFunctionInfo::iterator PI = FD->begin(),
PE = FD->end(); PI != PE; ++PI) {
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 766c6ee542a9..506b5cf09457 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -10,8 +10,8 @@
// This file implements target- and collector-independent garbage collection
// infrastructure.
//
-// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
-// are identified in SelectionDAGISel.
+// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
+// Roots are identified in SelectionDAGISel.
//
//===----------------------------------------------------------------------===//
@@ -35,9 +35,9 @@
using namespace llvm;
namespace {
-
+
/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
- /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
/// directed by the GCStrategy. It also performs automatic root initialization
/// and custom intrinsic lowering.
class LowerIntrinsics : public FunctionPass {
@@ -47,47 +47,46 @@ namespace {
bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
static bool InsertRootInitializers(Function &F,
AllocaInst **Roots, unsigned Count);
-
+
public:
static char ID;
-
+
LowerIntrinsics();
const char *getPassName() const;
void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
};
-
-
- /// MachineCodeAnalysis - This is a target-independent pass over the machine
+
+
+ /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
/// function representation to identify safe points for the garbage collector
/// in the machine code. It inserts labels at safe points and populates a
/// GCMetadata record for each function.
- class MachineCodeAnalysis : public MachineFunctionPass {
+ class GCMachineCodeAnalysis : public MachineFunctionPass {
const TargetMachine *TM;
GCFunctionInfo *FI;
MachineModuleInfo *MMI;
const TargetInstrInfo *TII;
-
+
void FindSafePoints(MachineFunction &MF);
void VisitCallPoint(MachineBasicBlock::iterator MI);
- MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
DebugLoc DL) const;
-
+
void FindStackOffsets(MachineFunction &MF);
-
+
public:
static char ID;
-
- MachineCodeAnalysis();
- const char *getPassName() const;
+
+ GCMachineCodeAnalysis();
void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
bool runOnMachineFunction(MachineFunction &MF);
};
-
+
}
// -----------------------------------------------------------------------------
@@ -97,6 +96,7 @@ GCStrategy::GCStrategy() :
CustomReadBarriers(false),
CustomWriteBarriers(false),
CustomRoots(false),
+ CustomSafePoints(false),
InitRoots(true),
UsesMetadata(false)
{}
@@ -104,18 +104,24 @@ GCStrategy::GCStrategy() :
GCStrategy::~GCStrategy() {
for (iterator I = begin(), E = end(); I != E; ++I)
delete *I;
-
+
Functions.clear();
}
-
+
bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
-
+
bool GCStrategy::performCustomLowering(Function &F) {
dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable("must override performCustomLowering");
+}
+
+
+bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
+ dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
llvm_unreachable(0);
- return 0;
}
+
GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
Functions.push_back(FI);
@@ -132,7 +138,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
FunctionPass *llvm::createGCLoweringPass() {
return new LowerIntrinsics();
}
-
+
char LowerIntrinsics::ID = 0;
LowerIntrinsics::LowerIntrinsics()
@@ -143,7 +149,7 @@ LowerIntrinsics::LowerIntrinsics()
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
}
-
+
void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<GCModuleInfo>();
@@ -161,22 +167,22 @@ bool LowerIntrinsics::doInitialization(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isDeclaration() && I->hasGC())
MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-
+
bool MadeChange = false;
for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
if (NeedsCustomLoweringPass(**I))
if ((*I)->initializeCustomLowering(M))
MadeChange = true;
-
+
return MadeChange;
}
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
unsigned Count) {
// Scroll past alloca instructions.
BasicBlock::iterator IP = F.getEntryBlock().begin();
while (isa<AllocaInst>(IP)) ++IP;
-
+
// Search for initializers in the initial BB.
SmallPtrSet<AllocaInst*,16> InitedRoots;
for (; !CouldBecomeSafePoint(IP); ++IP)
@@ -184,10 +190,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
if (AllocaInst *AI =
dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
InitedRoots.insert(AI);
-
+
// Add root initializers.
bool MadeChange = false;
-
+
for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
if (!InitedRoots.count(*I)) {
StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
@@ -196,7 +202,7 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
SI->insertAfter(*I);
MadeChange = true;
}
-
+
return MadeChange;
}
@@ -220,26 +226,26 @@ bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
// The natural definition of instructions which could introduce safe points
// are:
- //
+ //
// - call, invoke (AfterCall, BeforeCall)
// - phis (Loops)
// - invoke, ret, unwind (Exit)
- //
+ //
// However, instructions as seemingly inoccuous as arithmetic can become
// libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
// it is necessary to take a conservative approach.
-
+
if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
isa<StoreInst>(I) || isa<LoadInst>(I))
return false;
-
+
// llvm.gcroot is safe because it doesn't do anything at runtime.
if (CallInst *CI = dyn_cast<CallInst>(I))
if (Function *F = CI->getCalledFunction())
if (unsigned IID = F->getIntrinsicID())
if (IID == Intrinsic::gcroot)
return false;
-
+
return true;
}
@@ -249,15 +255,15 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
// Quick exit for functions that do not use GC.
if (!F.hasGC())
return false;
-
+
GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
GCStrategy &S = FI.getStrategy();
-
+
bool MadeChange = false;
-
+
if (NeedsDefaultLoweringPass(S))
MadeChange |= PerformDefaultLowering(F, S);
-
+
bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
if (UseCustomLoweringPass)
MadeChange |= S.performCustomLowering(F);
@@ -275,9 +281,9 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
bool LowerWr = !S.customWriteBarrier();
bool LowerRd = !S.customReadBarrier();
bool InitRoots = S.initializeRoots();
-
+
SmallVector<AllocaInst*, 32> Roots;
-
+
bool MadeChange = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -313,104 +319,104 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
default:
continue;
}
-
+
MadeChange = true;
}
}
}
-
+
if (Roots.size())
MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-
+
return MadeChange;
}
// -----------------------------------------------------------------------------
-FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
- return new MachineCodeAnalysis();
-}
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
-char MachineCodeAnalysis::ID = 0;
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
-MachineCodeAnalysis::MachineCodeAnalysis()
+GCMachineCodeAnalysis::GCMachineCodeAnalysis()
: MachineFunctionPass(ID) {}
-const char *MachineCodeAnalysis::getPassName() const {
- return "Analyze Machine Code For Garbage Collection";
-}
-
-void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
AU.addRequired<MachineModuleInfo>();
AU.addRequired<GCModuleInfo>();
}
-MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- DebugLoc DL) const {
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
return Label;
}
-void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
// Find the return address (next instruction), too, so as to bracket the call
// instruction.
- MachineBasicBlock::iterator RAI = CI;
- ++RAI;
-
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
}
-
+
if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
}
}
-void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
for (MachineFunction::iterator BBI = MF.begin(),
BBE = MF.end(); BBI != BBE; ++BBI)
for (MachineBasicBlock::iterator MI = BBI->begin(),
ME = BBI->end(); MI != ME; ++MI)
- if (MI->getDesc().isCall())
+ if (MI->isCall())
VisitCallPoint(MI);
}
-void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
const TargetFrameLowering *TFI = TM->getFrameLowering();
assert(TFI && "TargetRegisterInfo not available!");
-
+
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
RE = FI->roots_end(); RI != RE; ++RI)
RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
}
-bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
// Quick exit for functions that do not use GC.
if (!MF.getFunction()->hasGC())
return false;
-
+
FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
if (!FI->getStrategy().needsSafePoints())
return false;
-
+
TM = &MF.getTarget();
MMI = &getAnalysis<MachineModuleInfo>();
TII = TM->getInstrInfo();
-
+
// Find the size of the stack frame.
FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-
+
// Find all safe points.
- FindSafePoints(MF);
-
+ if (FI->getStrategy().customSafePoints()) {
+ FI->getStrategy().findCustomSafePoints(*FI, MF);
+ } else {
+ FindSafePoints(MF);
+ }
+
// Find the stack offsets for all roots.
FindStackOffsets(MF);
-
+
return false;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ce7ed293daac..75ae5b9c2c27 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -62,6 +62,7 @@ STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
STATISTIC(NumDupBBs, "Number of duplicated blocks");
+STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
namespace {
class IfConverter : public MachineFunctionPass {
@@ -169,7 +170,6 @@ namespace {
}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "If Converter"; }
private:
bool ReverseBranchCondition(BBInfo &BBI);
@@ -195,7 +195,8 @@ namespace {
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs);
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs = 0);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
SmallSet<unsigned, 4> &Redefs,
@@ -251,12 +252,12 @@ namespace {
char IfConverter::ID = 0;
}
+char &llvm::IfConverterID = IfConverter::ID;
+
INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
-FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
-
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
@@ -313,8 +314,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool RetVal = false;
switch (Kind) {
- default: assert(false && "Unexpected!");
- break;
+ default: llvm_unreachable("Unexpected!");
case ICSimple:
case ICSimpleFalse: {
bool isFalse = Kind == ICSimpleFalse;
@@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
// blocks, move the end iterators up past any branch instructions.
while (TIE != TIB) {
--TIE;
- if (!TIE->getDesc().isBranch())
+ if (!TIE->isBranch())
break;
}
while (FIE != FIB) {
--FIE;
- if (!FIE->getDesc().isBranch())
+ if (!FIE->isBranch())
break;
}
@@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (I->isDebugValue())
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isNotDuplicable())
+ if (I->isNotDuplicable())
BBI.CannotBeCopied = true;
bool isPredicated = TII->isPredicated(I);
- bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+ bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
if (!isCondBr) {
if (!isPredicated) {
@@ -963,7 +962,7 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Redefs.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
Redefs.insert(*Subreg);
}
@@ -984,7 +983,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
Defs.push_back(Reg);
else if (MO.isKill()) {
Redefs.erase(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
Redefs.erase(*SR);
}
}
@@ -997,7 +996,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
true/*IsImp*/,false/*IsKill*/));
} else {
Redefs.insert(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
Redefs.insert(*SR);
}
}
@@ -1035,7 +1034,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (Kind == ICSimpleFalse)
if (TII->ReverseBranchCondition(Cond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
// Initialize liveins to the first BB. These are potentiall redefined by
// predicated instructions.
@@ -1108,7 +1107,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
if (TII->ReverseBranchCondition(Cond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
if (ReverseBranchCondition(*CvtBBI)) {
@@ -1155,7 +1154,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
BBI.BB->addSuccessor(CvtBBI->FalseBB);
}
@@ -1227,7 +1226,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBInfo *BBI2 = &FalseBBI;
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
@@ -1281,7 +1280,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
- // Predicate the 'true' block after removing its branch.
+ // Remove branch from 'true' block and remove duplicated instructions.
BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
DI1 = BBI1->BB->end();
for (unsigned i = 0; i != NumDups2; ) {
@@ -1294,9 +1293,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
++i;
}
BBI1->BB->erase(DI1, BBI1->BB->end());
- PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
- // Predicate the 'false' block.
+ // Remove 'false' block branch and find the last instruction to predicate.
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
while (NumDups2 != 0) {
@@ -1308,6 +1306,55 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
if (!DI2->isDebugValue())
--NumDups2;
}
+
+ // Remember which registers would later be defined by the false block.
+ // This allows us not to predicate instructions in the true block that would
+ // later be re-defined. That is, rather than
+ // subeq r0, r1, #1
+ // addne r0, r1, #1
+ // generate:
+ // sub r0, r1, #1
+ // addne r0, r1, #1
+ SmallSet<unsigned, 4> RedefsByFalse;
+ SmallSet<unsigned, 4> ExtUses;
+ if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+ for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+ if (FI->isDebugValue())
+ continue;
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = FI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ } else if (!RedefsByFalse.count(Reg)) {
+ // These are defined before ctrl flow reach the 'false' instructions.
+ // They cannot be modified by the 'true' instructions.
+ ExtUses.insert(Reg);
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ ExtUses.insert(*SR);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!ExtUses.count(Reg)) {
+ RedefsByFalse.insert(Reg);
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ RedefsByFalse.insert(*SR);
+ }
+ }
+ }
+ }
+
+ // Predicate the 'true' block.
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+
+ // Predicate the 'false' block.
PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
// Merge the true block into the entry of the diamond.
@@ -1319,7 +1366,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// fold the tail block in as well. Otherwise, unless it falls through to the
// tail, add a unconditional branch to it.
if (TailBB) {
- BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
bool CanMergeTail = !TailBBI.HasFallThrough;
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.
@@ -1356,15 +1403,49 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return true;
}
+static bool MaySpeculate(const MachineInstr *MI,
+ SmallSet<unsigned, 4> &LaterRedefs,
+ const TargetInstrInfo *TII) {
+ bool SawStore = true;
+ if (!MI->isSafeToMove(TII, 0, SawStore))
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef() && !LaterRedefs.count(Reg))
+ return false;
+ }
+
+ return true;
+}
+
/// PredicateBlock - Predicate instructions from the start of the block to the
/// specified end with the specified condition.
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs) {
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs) {
+ bool AnyUnpred = false;
+ bool MaySpec = LaterRedefs != 0;
for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
if (I->isDebugValue() || TII->isPredicated(I))
continue;
+ // It may be possible not to predicate an instruction if it's the 'true'
+ // side of a diamond and the 'false' side may re-define the instruction's
+ // defs.
+ if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+ AnyUnpred = true;
+ continue;
+ }
+ // If any instruction is predicated, then every instruction after it must
+ // be predicated.
+ MaySpec = false;
if (!TII->PredicateInstruction(I, Cond)) {
#ifndef NDEBUG
dbgs() << "Unable to predicate " << *I << "!\n";
@@ -1383,6 +1464,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
BBI.NonPredSize = 0;
++NumIfConvBBs;
+ if (AnyUnpred)
+ ++NumUnpred;
}
/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
@@ -1395,9 +1478,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
E = FromBBI.BB->end(); I != E; ++I) {
- const MCInstrDesc &MCID = I->getDesc();
// Do not copy the end of the block branches.
- if (IgnoreBr && MCID.isBranch())
+ if (IgnoreBr && I->isBranch())
break;
MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 726af4696578..d5ea666e4a17 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,14 +14,15 @@
#define DEBUG_TYPE "regalloc"
#include "Spiller.h"
-#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -173,8 +174,7 @@ private:
void reMaterializeAll();
bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
- bool foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
MachineInstr *LoadMI = 0);
void insertReload(LiveInterval &NewLI, SlotIndex,
MachineBasicBlock::iterator MI);
@@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
if (isSibling(SrcReg)) {
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex());
+ LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true));
assert(SrcLR && "Copy from non-existing value");
// Check if this COPY kills its source.
SVI->second.KillsSource = (SrcLR->end == VNI->def);
@@ -644,16 +644,18 @@ void InlineSpiller::analyzeSiblingValues() {
if (VNI->isUnused())
continue;
MachineInstr *DefMI = 0;
+ if (!VNI->isPHIDef()) {
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ assert(DefMI && "No defining instruction");
+ }
// Check possible sibling copies.
- if (VNI->isPHIDef() || VNI->getCopy()) {
+ if (VNI->isPHIDef() || DefMI->isCopy()) {
VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
assert(OrigVNI && "Def outside original live range");
if (OrigVNI->def != VNI->def)
DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
}
- if (!DefMI && !VNI->isPHIDef())
- DefMI = LIS.getInstructionFromIndex(VNI->def);
- if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
<< VNI->def << " may remat from " << *DefMI);
}
@@ -665,8 +667,8 @@ void InlineSpiller::analyzeSiblingValues() {
/// a spill at a better location.
bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
- VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
- assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+ assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
SibValueMap::iterator I = SibValues.find(VNI);
if (I == SibValues.end())
return false;
@@ -726,7 +728,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
MRI.getRegClass(SVI.SpillReg), &TRI);
--MII; // Point to store instruction.
LIS.InsertMachineInstrInMaps(MII);
- VRM.addSpillSlotUse(StackSlot, MII);
DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
++NumSpills;
@@ -760,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Find all spills and copies of VNI.
for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
MachineInstr *MI = UI.skipInstruction();) {
- if (!MI->isCopy() && !MI->getDesc().mayStore())
+ if (!MI->isCopy() && !MI->mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
@@ -770,9 +771,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
if (isSibling(DstReg)) {
LiveInterval &DstLI = LIS.getInterval(DstReg);
- VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
assert(DstVNI && "Missing defined value");
- assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
WorkList.push_back(std::make_pair(&DstLI, DstVNI));
}
continue;
@@ -811,7 +812,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
if (PVNI)
WorkList.push_back(std::make_pair(LI, PVNI));
}
@@ -824,7 +825,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
continue;
LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
- VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
assert(SnipVNI && "Snippet undefined before copy");
WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
} while (!WorkList.empty());
@@ -833,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
MachineBasicBlock::iterator MI) {
- SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
if (!ParentVNI) {
@@ -855,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
if (SibI != SibValues.end())
RM.OrigMI = SibI->second.DefMI;
- if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+ if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
return false;
@@ -863,42 +864,37 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// If the instruction also writes VirtReg.reg, it had better not require the
// same register for uses and defs.
- bool Reads, Writes;
- SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
- if (Writes) {
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(Ops[i]);
- if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
- markValueUsed(&VirtReg, ParentVNI);
- DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
- return false;
- }
- }
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::RegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ if (RI.Tied) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
}
// Before rematerializing into a register for a single instruction, try to
// fold a load into the instruction. That avoids allocating a new register.
- if (RM.OrigMI->getDesc().canFoldAsLoad() &&
- foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+ if (RM.OrigMI->canFoldAsLoad() &&
+ foldMemoryOperand(Ops, RM.OrigMI)) {
Edit->markRematerialized(RM.ParentVNI);
++NumFoldedLoads;
return true;
}
// Alocate a new register for the remat.
- LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
+ LiveInterval &NewLI = Edit->createFrom(Original);
NewLI.markNotSpillable();
// Finally we can rematerialize OrigMI before MI.
SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
- LIS, TII, TRI);
+ TRI);
DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
<< *LIS.getInstructionFromIndex(DefIdx));
// Replace operands
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(Ops[i]);
+ MachineOperand &MO = MI->getOperand(Ops[i].second);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
MO.setReg(NewLI.reg);
MO.setIsKill();
@@ -906,8 +902,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
}
DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
- VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
++NumRemats;
return true;
@@ -917,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
// analyzeSiblingValues has already tested all relevant defining instructions.
- if (!Edit->anyRematerializable(LIS, TII, AA))
+ if (!Edit->anyRematerializable(AA))
return;
UsedValues.clear();
@@ -929,7 +925,7 @@ void InlineSpiller::reMaterializeAll() {
LiveInterval &LI = LIS.getInterval(Reg);
for (MachineRegisterInfo::use_nodbg_iterator
RI = MRI.use_nodbg_begin(Reg);
- MachineInstr *MI = RI.skipInstruction();)
+ MachineInstr *MI = RI.skipBundle();)
anyRemat |= reMaterializeFor(LI, MI);
}
if (!anyRemat)
@@ -958,7 +954,7 @@ void InlineSpiller::reMaterializeAll() {
if (DeadDefs.empty())
return;
DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
- Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
// Get rid of deleted and empty intervals.
for (unsigned i = RegsToSpill.size(); i != 0; --i) {
@@ -970,7 +966,7 @@ void InlineSpiller::reMaterializeAll() {
LiveInterval &LI = LIS.getInterval(Reg);
if (!LI.empty())
continue;
- Edit->eraseVirtReg(Reg, LIS);
+ Edit->eraseVirtReg(Reg);
RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
}
DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
@@ -1008,23 +1004,35 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
return true;
}
-/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// @param MI Instruction using or defining the current register.
-/// @param Ops Operand indices from readsWritesVirtualRegister().
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops Operand indices from analyzeVirtReg().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
-/// @return True on success, and MI will be erased.
-bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) {
+/// @return True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+ MachineInstr *LoadMI) {
+ if (Ops.empty())
+ return false;
+ // Don't attempt folding in bundles.
+ MachineInstr *MI = Ops.front().first;
+ if (Ops.back().first != MI || MI->isBundled())
+ return false;
+
bool WasCopy = MI->isCopy();
+ unsigned ImpReg = 0;
+
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- unsigned Idx = Ops[i];
+ unsigned Idx = Ops[i].second;
MachineOperand &MO = MI->getOperand(Idx);
- if (MO.isImplicit())
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
continue;
+ }
// FIXME: Teach targets to deal with subregs.
if (MO.getSubReg())
return false;
@@ -1042,13 +1050,24 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
if (!FoldMI)
return false;
LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
- if (!LoadMI)
- VRM.addSpillSlotUse(StackSlot, FoldMI);
MI->eraseFromParent();
- DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->RemoveOperand(i - 1);
+ }
+
+ DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t'
+ << *FoldMI);
if (!WasCopy)
++NumFolded;
- else if (Ops.front() == 0)
+ else if (Ops.front().second == 0)
++NumSpills;
else
++NumReloads;
@@ -1063,11 +1082,9 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to load instruction.
- SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
- VRM.addSpillSlotUse(StackSlot, MI);
+ SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
- VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
- LIS.getVNInfoAllocator());
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
++NumReloads;
}
@@ -1079,10 +1096,9 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to store instruction.
- SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
- VRM.addSpillSlotUse(StackSlot, MI);
+ SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
- VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
++NumSpills;
}
@@ -1093,8 +1109,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
- for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
- MachineInstr *MI = RI.skipInstruction();) {
+ for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RegI.skipBundle();) {
// Debug values are not allowed to affect codegen.
if (MI->isDebugValue()) {
@@ -1123,14 +1139,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
continue;
// Analyze instruction.
- bool Reads, Writes;
- SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::RegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
- SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
- if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
if (SlotIndex::isSameInstr(Idx, VNI->def))
Idx = VNI->def;
@@ -1143,7 +1159,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
SnippetCopies.insert(MI);
continue;
}
- if (Writes) {
+ if (RI.Writes) {
// Hoist the spill of a sib-reg copy.
if (hoistSpill(OldLI, MI)) {
// This COPY is now dead, the value is already in the stack slot.
@@ -1160,24 +1176,24 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
}
// Attempt to fold memory ops.
- if (foldMemoryOperand(MI, Ops))
+ if (foldMemoryOperand(Ops))
continue;
// Allocate interval around instruction.
// FIXME: Infer regclass from instruction alone.
- LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
+ LiveInterval &NewLI = Edit->createFrom(Reg);
NewLI.markNotSpillable();
- if (Reads)
+ if (RI.Reads)
insertReload(NewLI, Idx, MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(Ops[i]);
+ MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
MO.setReg(NewLI.reg);
if (MO.isUse()) {
- if (!MI->isRegTiedToDefOperand(Ops[i]))
+ if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
MO.setIsKill();
} else {
if (!MO.isDead())
@@ -1187,15 +1203,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
// FIXME: Use a second vreg if instruction has no tied ops.
- if (Writes) {
- if (hasLiveDef)
- insertSpill(NewLI, OldLI, Idx, MI);
- else {
- // This instruction defines a dead value. We don't need to spill it,
- // but do create a live range for the dead value.
- VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI));
- }
+ if (RI.Writes) {
+ if (hasLiveDef)
+ insertSpill(NewLI, OldLI, Idx, MI);
+ else {
+ // This instruction defines a dead value. We don't need to spill it,
+ // but do create a live range for the dead value.
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
+ }
}
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
@@ -1208,7 +1224,7 @@ void InlineSpiller::spillAll() {
if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
StackSlot = VRM.assignVirt2StackSlot(Original);
StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
- StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+ StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
} else
StackInt = &LSS.getInterval(StackSlot);
@@ -1228,7 +1244,7 @@ void InlineSpiller::spillAll() {
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
- Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
}
// Finally delete the SnippetCopies.
@@ -1237,7 +1253,6 @@ void InlineSpiller::spillAll() {
MachineInstr *MI = RI.skipInstruction();) {
assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
- VRM.RemoveMachineInstrFromMaps(MI);
LIS.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
}
@@ -1245,7 +1260,7 @@ void InlineSpiller::spillAll() {
// Delete all spilled registers.
for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- Edit->eraseVirtReg(RegsToSpill[i], LIS);
+ Edit->eraseVirtReg(RegsToSpill[i]);
}
void InlineSpiller::spill(LiveRangeEdit &edit) {
@@ -1274,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
if (!RegsToSpill.empty())
spillAll();
- Edit->calculateRegClassAndHint(MF, LIS, Loops);
+ Edit->calculateRegClassAndHint(MF, Loops);
}
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 29b47bd67ece..8368b58880a3 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,7 @@
#include "InterferenceCache.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
using namespace llvm;
@@ -24,13 +25,14 @@ InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
void InterferenceCache::init(MachineFunction *mf,
LiveIntervalUnion *liuarray,
SlotIndexes *indexes,
+ LiveIntervals *lis,
const TargetRegisterInfo *tri) {
MF = mf;
LIUArray = liuarray;
TRI = tri;
PhysRegEntries.assign(TRI->getNumRegs(), 0);
for (unsigned i = 0; i != CacheEntries; ++i)
- Entries[i].clear(mf, indexes);
+ Entries[i].clear(mf, indexes, lis);
}
InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
@@ -78,7 +80,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
PhysReg = physReg;
Blocks.resize(MF->getNumBlockIDs());
Aliases.clear();
- for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+ for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
LiveIntervalUnion *LIU = LIUArray + *AS;
Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
}
@@ -94,7 +96,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI) {
unsigned i = 0, e = Aliases.size();
- for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+ for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
LiveIntervalUnion *LIU = LIUArray + *AS;
if (i == e || Aliases[i].first != LIU)
return false;
@@ -121,6 +123,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
BlockInterference *BI = &Blocks[MBBNum];
+ ArrayRef<SlotIndex> RegMaskSlots;
+ ArrayRef<const uint32_t*> RegMaskBits;
for (;;) {
BI->Tag = Tag;
BI->First = BI->Last = SlotIndex();
@@ -137,6 +141,18 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->First = StartI;
}
+ // Also check for register mask interference.
+ RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+ SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+ for (unsigned i = 0, e = RegMaskSlots.size();
+ i != e && RegMaskSlots[i] < Limit; ++i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+ // Register mask i clobbers PhysReg before the LIU interference.
+ BI->First = RegMaskSlots[i];
+ break;
+ }
+
PrevPos = Stop;
if (BI->First.isValid())
break;
@@ -166,4 +182,15 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
if (Backup)
++I;
}
+
+ // Also check for register mask interference.
+ SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+ for (unsigned i = RegMaskSlots.size();
+ i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+ // Register mask i-1 clobbers PhysReg after the LIU interference.
+ // Model the regmask clobber as a dead def.
+ BI->Last = RegMaskSlots[i-1].getDeadSlot();
+ break;
+ }
}
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 4df0a9e5c393..485a325aa146 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -18,10 +18,11 @@
namespace llvm {
+class LiveIntervals;
+
class InterferenceCache {
const TargetRegisterInfo *TRI;
LiveIntervalUnion *LIUArray;
- SlotIndexes *Indexes;
MachineFunction *MF;
/// BlockInterference - information about the interference in a single basic
@@ -52,6 +53,9 @@ class InterferenceCache {
/// Indexes - Mapping block numbers to SlotIndex ranges.
SlotIndexes *Indexes;
+ /// LIS - Used for accessing register mask interference maps.
+ LiveIntervals *LIS;
+
/// PrevPos - The previous position the iterators were moved to.
SlotIndex PrevPos;
@@ -71,13 +75,14 @@ class InterferenceCache {
void update(unsigned MBBNum);
public:
- Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
- void clear(MachineFunction *mf, SlotIndexes *indexes) {
+ void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
assert(!hasRefs() && "Cannot clear cache entry with references");
PhysReg = 0;
MF = mf;
Indexes = indexes;
+ LIS = lis;
}
unsigned getPhysReg() const { return PhysReg; }
@@ -124,10 +129,10 @@ class InterferenceCache {
Entry *get(unsigned PhysReg);
public:
- InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+ InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
/// init - Prepare cache for a new function.
- void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
const TargetRegisterInfo *);
/// getMaxCursors - Return the maximum number of concurrent cursors that can
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 0f92c2d06bdd..a9ca42f69b97 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -448,11 +448,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::dbg_declare:
break; // Simply strip out debugging intrinsics
- case Intrinsic::eh_exception:
- case Intrinsic::eh_selector:
- CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
- break;
-
case Intrinsic::eh_typeid_for:
// Return something different to eh_selector.
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp
new file mode 100644
index 000000000000..96a53892f6d3
--- /dev/null
+++ b/lib/CodeGen/JITCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+using namespace llvm;
+
+void JITCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
new file mode 100644
index 000000000000..fee0347ea659
--- /dev/null
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/CodeGen/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmPrinter SelectionDAG
+
+[component_0]
+type = Library
+name = CodeGen
+parent = Libraries
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 187147a3e252..a1f479a4275f 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,82 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/PassManager.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-namespace llvm {
- bool EnableFastISel;
-}
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
- cl::desc("Disable Post Regalloc"));
-static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
- cl::desc("Disable branch folding"));
-static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
- cl::desc("Disable tail duplication"));
-static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
- cl::desc("Disable pre-register allocation tail duplication"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
- cl::desc("Disable code placement"));
-static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
- cl::desc("Disable Stack Slot Coloring"));
-static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
- cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
- cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
- cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
- cl::Hidden,
- cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
- cl::desc("Disable Machine Sinking"));
-static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
- cl::desc("Disable Loop Strength Reduction Pass"));
-static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
- cl::desc("Disable Codegen Prepare"));
-static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
- cl::desc("Print LLVM IR produced by the loop-reduce pass"));
-static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
- cl::desc("Print LLVM IR input to isel pass"));
-static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
- cl::desc("Dump garbage collector data"));
static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
cl::desc("Show encoding in .s output"));
static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
cl::desc("Show instruction structure in .s output"));
-static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
- cl::desc("Enable MC API logging"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
- cl::desc("Verify generated machine code"),
- cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
static cl::opt<cl::boolOrDefault>
AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
@@ -94,25 +54,20 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
static bool getVerboseAsm() {
switch (AsmVerbose) {
- default:
case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
case cl::BOU_TRUE: return true;
case cl::BOU_FALSE: return false;
}
+ llvm_unreachable("Invalid verbose asm state");
}
-// Enable or disable FastISel. Both options are needed, because
-// FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -O0.
-static cl::opt<cl::boolOrDefault>
-EnableFastISelOption("fast-isel", cl::Hidden,
- cl::desc("Enable the \"fast\" instruction selector"));
-
LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : TargetMachine(T, Triple, CPU, FS) {
- CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM);
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, Triple, CPU, FS, Options) {
+ CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
AsmInfo = T.createMCAsmInfo(Triple);
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
@@ -123,16 +78,88 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
"and that InitializeAllTargetMCs() is being invoked!");
}
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+static void addPassesToHandleExceptions(TargetMachine *TM,
+ PassManagerBase &PM) {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ PM.add(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ PM.add(createLowerInvokePass(TM->getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+ break;
+ }
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
+ PassManagerBase &PM,
+ bool DisableVerify) {
+ // Targets may override createPassConfig to provide a target-specific sublass.
+ TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+
+ // Set PassConfig options provided by TargetMachine.
+ PassConfig->setDisableVerify(DisableVerify);
+
+ PM.add(PassConfig);
+
+ PassConfig->addIRPasses();
+
+ addPassesToHandleExceptions(TM, PM);
+
+ PassConfig->addISelPrepare();
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI =
+ new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
+ &TM->getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*TM));
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (TM->getOptLevel() == CodeGenOpt::None &&
+ EnableFastISelOption != cl::BOU_FALSE))
+ TM->setFastISel(true);
+
+ // Ask the target for an isel.
+ if (PassConfig->addInstSelector())
+ return NULL;
+
+ PassConfig->addMachinePasses();
+
+ PassConfig->setInitialized();
+
+ return Context;
+}
+
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Context = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ if (!Context)
return true;
- assert(Context != 0 && "Failed to get MCContext");
if (hasMCSaveTempLabels())
Context->setAllowTemporaryLabels(false);
@@ -142,10 +169,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
OwningPtr<MCStreamer> AsmStreamer;
switch (FileType) {
- default: return true;
case CGFT_AssemblyFile: {
MCInstPrinter *InstPrinter =
- getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
+ *getInstrInfo(),
+ Context->getRegisterInfo(), STI);
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = 0;
@@ -160,6 +188,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
getVerboseAsm(),
hasMCUseLoc(),
hasMCUseCFI(),
+ hasMCUseDwarfDirectory(),
InstPrinter,
MCE, MAB,
ShowMCInst);
@@ -189,9 +218,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
break;
}
- if (EnableMCLogging)
- AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
-
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
if (Printer == 0)
@@ -214,14 +240,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
///
bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Ctx = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ if (!Context)
return true;
- addCodeEmitter(PM, OptLevel, JCE);
+ addCodeEmitter(PM, JCE);
PM.add(createGCInfoDeleter());
return false; // success!
@@ -235,10 +260,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
MCContext *&Ctx,
raw_ostream &Out,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify);
+ if (!Ctx)
return true;
if (hasMCSaveTempLabels())
@@ -247,7 +272,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI,
+ *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
if (MCE == 0 || MAB == 0)
return true;
@@ -271,227 +297,3 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
return false; // success!
}
-
-static void printNoVerify(PassManagerBase &PM, const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-}
-
-static void printAndVerify(PassManagerBase &PM,
- const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
- if (VerifyMachineCode)
- PM.add(createMachineVerifierPass(Banner));
-}
-
-/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
-/// emitting to assembly files or machine code output.
-///
-bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify,
- MCContext *&OutContext) {
- // Standard LLVM-Level Passes.
-
- // Basic AliasAnalysis support.
- // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
- // BasicAliasAnalysis wins if they disagree. This is intended to help
- // support "obvious" type-punning idioms.
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createBasicAliasAnalysisPass());
-
- // Before running any passes, run the verifier to determine if the input
- // coming from the front-end and/or optimizer is valid.
- if (!DisableVerify)
- PM.add(createVerifierPass());
-
- // Run loop strength reduction before anything else.
- if (OptLevel != CodeGenOpt::None && !DisableLSR) {
- PM.add(createLoopStrengthReducePass(getTargetLowering()));
- if (PrintLSR)
- PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
- }
-
- PM.add(createGCLoweringPass());
-
- // Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
-
- // Turn exception handling constructs into something the code generators can
- // handle.
- switch (getMCAsmInfo()->getExceptionHandlingType()) {
- case ExceptionHandling::SjLj:
- // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
- // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
- // catch info can get misplaced when a selector ends up more than one block
- // removed from the parent invoke(s). This could happen when a landing
- // pad is shared by multiple invokes and is also a target of a normal
- // edge from elsewhere.
- PM.add(createSjLjEHPass(getTargetLowering()));
- // FALLTHROUGH
- case ExceptionHandling::DwarfCFI:
- case ExceptionHandling::ARM:
- case ExceptionHandling::Win64:
- PM.add(createDwarfEHPass(this));
- break;
- case ExceptionHandling::None:
- PM.add(createLowerInvokePass(getTargetLowering()));
-
- // The lower invoke pass may create unreachable code. Remove it.
- PM.add(createUnreachableBlockEliminationPass());
- break;
- }
-
- if (OptLevel != CodeGenOpt::None && !DisableCGP)
- PM.add(createCodeGenPreparePass(getTargetLowering()));
-
- PM.add(createStackProtectorPass(getTargetLowering()));
-
- addPreISel(PM, OptLevel);
-
- if (PrintISelInput)
- PM.add(createPrintFunctionPass("\n\n"
- "*** Final LLVM Code input to ISel ***\n",
- &dbgs()));
-
- // All passes which modify the LLVM IR are now complete; run the verifier
- // to ensure that the IR is valid.
- if (!DisableVerify)
- PM.add(createVerifierPass());
-
- // Standard Lower-Level Passes.
-
- // Install a MachineModuleInfo class, which is an immutable pass that holds
- // all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
- *getRegisterInfo(),
- &getTargetLowering()->getObjFileLowering());
- PM.add(MMI);
- OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
- // Set up a MachineFunction for the rest of CodeGen to work on.
- PM.add(new MachineFunctionAnalysis(*this, OptLevel));
-
- // Enable FastISel with -fast, but allow that to be overridden.
- if (EnableFastISelOption == cl::BOU_TRUE ||
- (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
- EnableFastISel = true;
-
- // Ask the target for an isel.
- if (addInstSelector(PM, OptLevel))
- return true;
-
- // Print the instruction selected machine code...
- printAndVerify(PM, "After Instruction Selection");
-
- // Expand pseudo-instructions emitted by ISel.
- PM.add(createExpandISelPseudosPass());
-
- // Pre-ra tail duplication.
- if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
- PM.add(createTailDuplicatePass(true));
- printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
- }
-
- // Optimize PHIs before DCE: removing dead PHI cycles may make more
- // instructions dead.
- if (OptLevel != CodeGenOpt::None)
- PM.add(createOptimizePHIsPass());
-
- // If the target requests it, assign local variables to stack slots relative
- // to one another and simplify frame index references where possible.
- PM.add(createLocalStackSlotAllocationPass());
-
- if (OptLevel != CodeGenOpt::None) {
- // With optimization, dead code should already be eliminated. However
- // there is one known exception: lowered code for arguments that are only
- // used by tail calls, where the tail calls reuse the incoming stack
- // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- if (!DisableMachineDCE)
- PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass");
-
- if (!DisableMachineLICM)
- PM.add(createMachineLICMPass());
- if (!DisableMachineCSE)
- PM.add(createMachineCSEPass());
- if (!DisableMachineSink)
- PM.add(createMachineSinkingPass());
- printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
-
- PM.add(createPeepholeOptimizerPass());
- printAndVerify(PM, "After codegen peephole optimization pass");
- }
-
- // Run pre-ra passes.
- if (addPreRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PreRegAlloc passes");
-
- // Perform register allocation.
- PM.add(createRegisterAllocator(OptLevel));
- printAndVerify(PM, "After Register Allocation");
-
- // Perform stack slot coloring and post-ra machine LICM.
- if (OptLevel != CodeGenOpt::None) {
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- if (!DisableSSC)
- PM.add(createStackSlotColoringPass(false));
-
- // Run post-ra machine LICM to hoist reloads / remats.
- if (!DisablePostRAMachineLICM)
- PM.add(createMachineLICMPass(false));
-
- printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
- }
-
- // Run post-ra passes.
- if (addPostRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PostRegAlloc passes");
-
- PM.add(createExpandPostRAPseudosPass());
- printAndVerify(PM, "After ExpandPostRAPseudos");
-
- // Insert prolog/epilog code. Eliminate abstract frame index references...
- PM.add(createPrologEpilogCodeInserter());
- printAndVerify(PM, "After PrologEpilogCodeInserter");
-
- // Run pre-sched2 passes.
- if (addPreSched2(PM, OptLevel))
- printAndVerify(PM, "After PreSched2 passes");
-
- // Second pass scheduler.
- if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
- PM.add(createPostRAScheduler(OptLevel));
- printAndVerify(PM, "After PostRAScheduler");
- }
-
- // Branch folding must be run after regalloc and prolog/epilog insertion.
- if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
- PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
- printNoVerify(PM, "After BranchFolding");
- }
-
- // Tail duplication.
- if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
- PM.add(createTailDuplicatePass(false));
- printNoVerify(PM, "After TailDuplicate");
- }
-
- PM.add(createGCMachineCodeAnalysisPass());
-
- if (PrintGCInfo)
- PM.add(createGCInfoPrinter(dbgs()));
-
- if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
- PM.add(createCodePlacementOptPass());
- printNoVerify(PM, "After CodePlacementOpt");
- }
-
- if (addPreEmitPass(PM, OptLevel))
- printNoVerify(PM, "After PreEmit passes");
-
- return false;
-}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 0eb009ddac29..deab05a412c9 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
// Finally, just to provide a stable ordering, use the node number as a
// deciding factor.
- return LHSNum < RHSNum;
+ return RHSNum < LHSNum;
}
@@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) {
}
-// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// scheduledNode - As nodes are scheduled, we look to see if there are any
// successor nodes that have a single unscheduled predecessor. If so, that
// single predecessor has a higher priority, since scheduling it will make
// the node available.
-void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
AdjustPriorityOfUnscheduledPreds(I->getSUnit());
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index a12e1a36d113..f1abcbb1dd5c 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -311,6 +311,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
return Result;
}
+void LexicalScope::anchor() { }
+
/// dump - Print data structures.
void LexicalScope::dump() const {
#ifndef NDEBUG
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 3dfe4c0e8cfa..2187833031ee 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -226,7 +226,7 @@ public:
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS);
+ UserValueScopes &UVS);
/// addDefsFromCopies - The value in LI/LocNo may be copies to other
/// registers. Determine if any of the copies are available at the kill
@@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
// DBG_VALUE has no slot index, use the previous instruction instead.
SlotIndex Idx = MBBI == MBB->begin() ?
LIS->getMBBStartIdx(MBB) :
- LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
// Handle consecutive DBG_VALUE instructions with the same slot index.
do {
if (handleDebugValue(MBBI, Idx)) {
@@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ UserValueScopes &UVS) {
SmallVector<SlotIndex, 16> Todo;
Todo.push_back(Idx);
do {
@@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
// Is LocNo extended to reach this copy? If not, another def may be blocking
// it, or we are looking at a wrong value of LI.
SlotIndex Idx = LIS.getInstructionIndex(MI);
- LocMap::iterator I = locInts.find(Idx.getUseIndex());
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
if (!I.valid() || I.value() != LocNo)
continue;
if (!LIS.hasInterval(DstReg))
continue;
LiveInterval *DstLI = &LIS.getInterval(DstReg);
- const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
- assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
CopyValues.push_back(std::make_pair(DstLI, DstVNI));
}
@@ -620,7 +620,7 @@ void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveIntervals &LIS,
MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ UserValueScopes &UVS) {
SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
@@ -841,7 +841,7 @@ bool
UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
bool DidChange = false;
// Split locations referring to OldReg. Iterate backwards so splitLocation can
- // safely erase unuused locations.
+ // safely erase unused locations.
for (unsigned i = locations.size(); i ; --i) {
unsigned LocNo = i-1;
const MachineOperand *Loc = &locations[LocNo];
@@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
// index is no longer available. That means the user value is in a
// non-existent sub-register, and %noreg is exactly what we want.
Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
- } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
- VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
// FIXME: Translate SubIdx to a stackslot offset.
Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
} else {
@@ -921,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
}
// Don't insert anything after the first terminator, though.
- return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
- llvm::next(MachineBasicBlock::iterator(MI));
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
}
DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index b69945aea98f..ac18843ac30d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -381,37 +381,40 @@ void LiveInterval::join(LiveInterval &Other,
for (unsigned i = 0; i != NumVals; ++i) {
unsigned LHSValID = LHSValNoAssignments[i];
if (i != LHSValID ||
- (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
MustMapCurValNos = true;
+ break;
+ }
}
// If we have to apply a mapping to our base interval assignment, rewrite it
// now.
if (MustMapCurValNos) {
// Map the first live range.
+
iterator OutIt = begin();
OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
- ++OutIt;
- for (iterator I = OutIt, E = end(); I != E; ++I) {
- OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ for (iterator I = next(OutIt), E = end(); I != E; ++I) {
+ VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ assert(nextValNo != 0 && "Huh?");
// If this live range has the same value # as its immediate predecessor,
// and if they are neighbors, remove one LiveRange. This happens when we
- // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
- if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
- (OutIt-1)->end = OutIt->end;
+ // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+ OutIt->end = I->end;
} else {
- if (I != OutIt) {
+ // Didn't merge. Move OutIt to the next interval,
+ ++OutIt;
+ OutIt->valno = nextValNo;
+ if (OutIt != I) {
OutIt->start = I->start;
OutIt->end = I->end;
}
-
- // Didn't merge, on to the next one.
- ++OutIt;
}
}
-
// If we merge some live ranges, chop off the end.
+ ++OutIt;
ranges.erase(OutIt, end());
}
@@ -639,8 +642,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
OS << "-phidef";
if (vni->hasPHIKill())
OS << "-phikill";
- if (vni->hasRedefByEC())
- OS << "-ec";
}
}
}
@@ -680,15 +681,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
// Connect to values live out of predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI)
- if (const VNInfo *PVNI =
- LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+ if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
EqClass.join(VNI->id, PVNI->id);
} else {
// Normal value defined by an instruction. Check for two-addr redef.
// FIXME: This could be coincidental. Should we really check for a tied
// operand constraint?
// Note that VNI->def may be a use slot for an early clobber def.
- if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+ if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
EqClass.join(VNI->id, UVNI->id);
}
}
@@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
continue;
// DBG_VALUE instructions should have been eliminated earlier.
SlotIndex Idx = LIS.getInstructionIndex(MI);
- Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ Idx = Idx.getRegSlot(MO.isUse());
const VNInfo *VNI = LI.getVNInfoAt(Idx);
assert(VNI && "Interval not live at use.");
MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index b1e202a273d3..3ade66097cbd 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -15,31 +15,22 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "liveintervals"
+#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "VirtRegMap.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ProcessImplicitDefs.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -52,19 +43,14 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
STATISTIC(numIntervals , "Number of original intervals");
-STATISTIC(numFolds , "Number of loads/stores folded into instructions");
-STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
-INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
@@ -74,18 +60,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveVariables>();
AU.addPreserved<LiveVariables>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
-
- if (!StrongPHIElim) {
- AU.addPreservedID(PHIEliminationID);
- AU.addRequiredID(PHIEliminationID);
- }
-
- AU.addRequiredID(TwoAddressInstructionPassID);
- AU.addPreserved<ProcessImplicitDefs>();
- AU.addRequired<ProcessImplicitDefs>();
AU.addPreserved<SlotIndexes>();
AU.addRequiredTransitive<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -98,14 +74,12 @@ void LiveIntervals::releaseMemory() {
delete I->second;
r2iMap_.clear();
+ RegMaskSlots.clear();
+ RegMaskBits.clear();
+ RegMaskBlocks.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
- while (!CloneMIs.empty()) {
- MachineInstr *MI = CloneMIs.back();
- CloneMIs.pop_back();
- mf_->DeleteMachineInstr(MI);
- }
}
/// runOnMachineFunction - Register allocate the whole function
@@ -120,6 +94,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
lv_ = &getAnalysis<LiveVariables>();
indexes_ = &getAnalysis<SlotIndexes>();
allocatableRegs_ = tri_->getAllocatableSet(fn);
+ reservedRegs_ = tri_->getReservedRegs(fn);
computeIntervals();
@@ -132,10 +107,21 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
/// print - Implement the dump method.
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- I->second->print(OS, tri_);
- OS << "\n";
- }
+
+ // Dump the physregs.
+ for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg)
+ if (const LiveInterval *LI = r2iMap_.lookup(Reg)) {
+ LI->print(OS, tri_);
+ OS << '\n';
+ }
+
+ // Dump the virtregs.
+ for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg)
+ if (const LiveInterval *LI =
+ r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) {
+ LI->print(OS, tri_);
+ OS << '\n';
+ }
printInstrs(OS);
}
@@ -149,103 +135,6 @@ void LiveIntervals::dumpInstrs() const {
printInstrs(dbgs());
}
-bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
- VirtRegMap &vrm, unsigned reg) {
- // We don't handle fancy stuff crossing basic block boundaries
- if (li.ranges.size() != 1)
- return true;
- const LiveRange &range = li.ranges.front();
- SlotIndex idx = range.start.getBaseIndex();
- SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
-
- // Skip deleted instructions
- MachineInstr *firstMI = getInstructionFromIndex(idx);
- while (!firstMI && idx != end) {
- idx = idx.getNextIndex();
- firstMI = getInstructionFromIndex(idx);
- }
- if (!firstMI)
- return false;
-
- // Find last instruction in range
- SlotIndex lastIdx = end.getPrevIndex();
- MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
- while (!lastMI && lastIdx != idx) {
- lastIdx = lastIdx.getPrevIndex();
- lastMI = getInstructionFromIndex(lastIdx);
- }
- if (!lastMI)
- return false;
-
- // Range cannot cross basic block boundaries or terminators
- MachineBasicBlock *MBB = firstMI->getParent();
- if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
- return true;
-
- MachineBasicBlock::const_iterator E = lastMI;
- ++E;
- for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
- const MachineInstr &MI = *I;
-
- // Allow copies to and from li.reg
- if (MI.isCopy())
- if (MI.getOperand(0).getReg() == li.reg ||
- MI.getOperand(1).getReg() == li.reg)
- continue;
-
- // Check for operands using reg
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand& mop = MI.getOperand(i);
- if (!mop.isReg())
- continue;
- unsigned PhysReg = mop.getReg();
- if (PhysReg == 0 || PhysReg == li.reg)
- continue;
- if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
- if (!vrm.hasPhys(PhysReg))
- continue;
- PhysReg = vrm.getPhys(PhysReg);
- }
- if (PhysReg && tri_->regsOverlap(PhysReg, reg))
- return true;
- }
- }
-
- // No conflicts found.
- return false;
-}
-
-bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
- SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- for (SlotIndex index = I->start.getBaseIndex(),
- end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
- index != end;
- index = index.getNextIndex()) {
- MachineInstr *MI = getInstructionFromIndex(index);
- if (!MI)
- continue; // skip deleted instructions
-
- if (JoinedCopies.count(MI))
- continue;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned PhysReg = MO.getReg();
- if (PhysReg == 0 || PhysReg == Reg ||
- TargetRegisterInfo::isVirtualRegister(PhysReg))
- continue;
- if (tri_->regsOverlap(Reg, PhysReg))
- return true;
- }
- }
- }
-
- return false;
-}
-
static
bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -271,9 +160,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
if (!MO.getSubReg() || MO.isEarlyClobber())
return false;
- SlotIndex RedefIndex = MIIdx.getDefIndex();
+ SlotIndex RedefIndex = MIIdx.getRegSlot();
const LiveRange *OldLR =
- interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
if (DefMI != 0) {
return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
@@ -296,34 +185,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
if (interval.empty()) {
// Get the Idx of the defining instructions.
- SlotIndex defIndex = MIIdx.getDefIndex();
- // Earlyclobbers move back one, so that they overlap the live range
- // of inputs.
- if (MO.isEarlyClobber())
- defIndex = MIIdx.getUseIndex();
-
- // Make sure the first definition is not a partial redefinition. Add an
- // <imp-def> of the full register.
- // FIXME: LiveIntervals shouldn't modify the code like this. Whoever
- // created the machine instruction should annotate it with <undef> flags
- // as needed. Then we can simply assert here. The REG_SEQUENCE lowering
- // is the main suspect.
- if (MO.getSubReg()) {
- mi->addRegisterDefined(interval.reg);
- // Mark all defs of interval.reg on this instruction as reading <undef>.
- for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO2 = mi->getOperand(i);
- if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
- MO2.setIsUndef();
- }
- }
+ SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
- MachineInstr *CopyMI = NULL;
- if (mi->isCopyLike()) {
- CopyMI = mi;
- }
+ // Make sure the first definition is not a partial redefinition.
+ assert(!MO.readsReg() && "First def cannot also read virtual register "
+ "missing <undef> flag?");
- VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
// Loop over all of the blocks that the vreg is defined in. There are
@@ -334,9 +202,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// FIXME: what about dead vars?
SlotIndex killIdx;
if (vi.Kills[0] != mi)
- killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+ killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
else
- killIdx = defIndex.getStoreIndex();
+ killIdx = defIndex.getDeadSlot();
// If the kill happens after the definition, we have an intra-block
// live range.
@@ -384,14 +252,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
MachineInstr *Kill = vi.Kills[i];
SlotIndex Start = getMBBStartIdx(Kill->getParent());
- SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+ SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();
// Create interval with one of a NEW value number. Note that this value
// number isn't actually defined by an instruction, weird huh? :)
if (PHIJoin) {
assert(getInstructionFromIndex(Start) == 0 &&
"PHI def index points at actual instruction.");
- ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+ ValNo = interval.getNextValue(Start, VNInfoAllocator);
ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
@@ -422,14 +290,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// are actually two values in the live interval. Because of this we
// need to take the LiveRegion that defines this register and split it
// into two values.
- SlotIndex RedefIndex = MIIdx.getDefIndex();
- if (MO.isEarlyClobber())
- RedefIndex = MIIdx.getUseIndex();
+ SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
const LiveRange *OldLR =
- interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
VNInfo *OldValNo = OldLR->valno;
- SlotIndex DefIndex = OldValNo->def.getDefIndex();
+ SlotIndex DefIndex = OldValNo->def.getRegSlot();
// Delete the previous value, which should be short and continuous,
// because the 2-addr copy must be in the same MBB as the redef.
@@ -440,12 +306,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
// Value#0 is now defined by the 2-addr instruction.
- OldValNo->def = RedefIndex;
- OldValNo->setCopy(0);
-
- // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
- if (PartReDef && mi->isCopyLike())
- OldValNo->setCopy(&*mi);
+ OldValNo->def = RedefIndex;
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -455,7 +316,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// If this redefinition is dead, we need to add a dummy unit live
// range covering the def slot.
if (MO.isDead())
- interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+ interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
OldValNo));
DEBUG({
@@ -467,15 +328,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// live until the end of the block. We've already taken care of the
// rest of the live range.
- SlotIndex defIndex = MIIdx.getDefIndex();
+ SlotIndex defIndex = MIIdx.getRegSlot();
if (MO.isEarlyClobber())
- defIndex = MIIdx.getUseIndex();
+ defIndex = MIIdx.getRegSlot(true);
- VNInfo *ValNo;
- MachineInstr *CopyMI = NULL;
- if (mi->isCopyLike())
- CopyMI = mi;
- ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
@@ -490,21 +347,26 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << '\n');
}
+static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) {
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end();
+ SI != SE; ++SI) {
+ const MachineBasicBlock* succ = *SI;
+ if (succ->isLiveIn(Reg))
+ return true;
+ }
+ return false;
+}
+
void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
MachineBasicBlock::iterator mi,
SlotIndex MIIdx,
MachineOperand& MO,
- LiveInterval &interval,
- MachineInstr *CopyMI) {
- // A physical register cannot be live across basic block, so its
- // lifetime must end somewhere in its defining basic block.
+ LiveInterval &interval) {
DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
SlotIndex baseIndex = MIIdx;
- SlotIndex start = baseIndex.getDefIndex();
- // Earlyclobbers move back one.
- if (MO.isEarlyClobber())
- start = MIIdx.getUseIndex();
+ SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber());
SlotIndex end = start;
// If it is not used after definition, it is considered dead at
@@ -514,7 +376,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
// advance below compensates.
if (MO.isDead()) {
DEBUG(dbgs() << " dead");
- end = start.getStoreIndex();
+ end = start.getDeadSlot();
goto exit;
}
@@ -531,21 +393,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
if (mi->killsRegister(interval.reg, tri_)) {
DEBUG(dbgs() << " killed");
- end = baseIndex.getDefIndex();
+ end = baseIndex.getRegSlot();
goto exit;
} else {
int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
if (DefIdx != -1) {
if (mi->isRegTiedToUseOperand(DefIdx)) {
// Two-address instruction.
- end = baseIndex.getDefIndex();
+ end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber());
} else {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that
// defines it. Hence its interval is:
// [defSlot(def), defSlot(def)+1)
DEBUG(dbgs() << " dead");
- end = start.getStoreIndex();
+ end = start.getDeadSlot();
}
goto exit;
}
@@ -554,12 +416,19 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
baseIndex = baseIndex.getNextIndex();
}
- // The only case we should have a dead physreg here without a killing or
- // instruction where we know it's dead is if it is live-in to the function
- // and never used. Another possible case is the implicit use of the
- // physical register has been deleted by two-address pass.
- end = start.getStoreIndex();
+ // If we get here the register *should* be live out.
+ assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!");
+ // FIXME: We need saner rules for reserved regs.
+ if (isReserved(interval.reg)) {
+ end = start.getDeadSlot();
+ } else {
+ // Unreserved, unallocable registers like EFLAGS can be live across basic
+ // block boundaries.
+ assert(isRegLiveIntoSuccessor(MBB, interval.reg) &&
+ "Unreserved reg not live-out?");
+ end = getMBBEndIdx(MBB);
+ }
exit:
assert(start < end && "did not find end of interval?");
@@ -567,9 +436,7 @@ exit:
VNInfo *ValNo = interval.getVNInfoAt(start);
bool Extend = ValNo != 0;
if (!Extend)
- ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
- if (Extend && MO.isEarlyClobber())
- ValNo->setHasRedefByEC(true);
+ ValNo = interval.getNextValue(start, VNInfoAllocator);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
DEBUG(dbgs() << " +" << LR << '\n');
@@ -583,18 +450,20 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
getOrCreateInterval(MO.getReg()));
- else {
- MachineInstr *CopyMI = NULL;
- if (MI->isCopyLike())
- CopyMI = MI;
+ else
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
- getOrCreateInterval(MO.getReg()), CopyMI);
- }
+ getOrCreateInterval(MO.getReg()));
}
void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex MIIdx,
- LiveInterval &interval, bool isAlias) {
+ LiveInterval &interval) {
+ assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) &&
+ "Only physical registers can be live in.");
+ assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() ||
+ MBB->isLandingPad()) &&
+ "Allocatable live-ins only valid for entry blocks and landing pads.");
+
DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
// Look for kills, if it reaches a def before it's killed, then it shouldn't
@@ -621,16 +490,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
while (mi != E) {
if (mi->killsRegister(interval.reg, tri_)) {
DEBUG(dbgs() << " killed");
- end = baseIndex.getDefIndex();
+ end = baseIndex.getRegSlot();
SeenDefUse = true;
break;
- } else if (mi->definesRegister(interval.reg, tri_)) {
+ } else if (mi->modifiesRegister(interval.reg, tri_)) {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that defines
// it. Hence its interval is:
// [defSlot(def), defSlot(def)+1)
DEBUG(dbgs() << " dead");
- end = start.getStoreIndex();
+ end = start.getDeadSlot();
SeenDefUse = true;
break;
}
@@ -644,10 +513,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
// Live-in register might not be used at all.
if (!SeenDefUse) {
- if (isAlias) {
+ if (isAllocatable(interval.reg) ||
+ !isRegLiveIntoSuccessor(MBB, interval.reg)) {
+ // Allocatable registers are never live through.
+ // Non-allocatable registers that aren't live into any successors also
+ // aren't live through.
DEBUG(dbgs() << " dead");
- end = MIIdx.getStoreIndex();
+ return;
} else {
+ // If we get here the register is non-allocatable and live into some
+ // successor. We'll conservatively assume it's live-through.
DEBUG(dbgs() << " live through");
end = getMBBEndIdx(MBB);
}
@@ -656,8 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex defIdx = getMBBStartIdx(MBB);
assert(getInstructionFromIndex(defIdx) == 0 &&
"PHI def index points at actual instruction.");
- VNInfo *vni =
- interval.getNextValue(defIdx, 0, VNInfoAllocator);
+ VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
@@ -674,10 +548,14 @@ void LiveIntervals::computeIntervals() {
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
+ RegMaskBlocks.resize(mf_->getNumBlockIDs());
+
SmallVector<unsigned, 8> UndefUses;
for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
+ RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
+
if (MBB->empty())
continue;
@@ -690,11 +568,6 @@ void LiveIntervals::computeIntervals() {
for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
LE = MBB->livein_end(); LI != LE; ++LI) {
handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
- // Multiple live-ins can alias the same register.
- for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
- if (!hasInterval(*AS))
- handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
- true);
}
// Skip over empty initial indices.
@@ -706,10 +579,20 @@ void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
if (MI->isDebugValue())
continue;
+ assert(indexes_->getInstructionFromIndex(MIIndex) == MI &&
+ "Lost SlotIndex synchronization");
// Handle defs.
for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
MachineOperand &MO = MI->getOperand(i);
+
+ // Collect register masks.
+ if (MO.isRegMask()) {
+ RegMaskSlots.push_back(MIIndex.getRegSlot());
+ RegMaskBits.push_back(MO.getRegMask());
+ continue;
+ }
+
if (!MO.isReg() || !MO.getReg())
continue;
@@ -723,6 +606,10 @@ void LiveIntervals::computeIntervals() {
// Move to the next instr slot.
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
}
+
+ // Compute the number of register mask instructions in this block.
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.second = RegMaskSlots.size() - RMB.first;;
}
// Create empty intervals for registers defined by implicit_def's (except
@@ -754,7 +641,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
DEBUG(dbgs() << "Shrink: " << *li << '\n');
assert(TargetRegisterInfo::isVirtualRegister(li->reg)
- && "Can't only shrink physical registers");
+ && "Can only shrink virtual registers");
// Find all the values used, including PHI kills.
SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
@@ -766,8 +653,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
MachineInstr *UseMI = I.skipInstruction();) {
if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
continue;
- SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
- VNInfo *VNI = li->getVNInfoAt(Idx);
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ // Note: This intentionally picks up the wrong VNI in case of an EC redef.
+ // See below.
+ VNInfo *VNI = li->getVNInfoBefore(Idx);
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
@@ -777,11 +666,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
<< *li << '\n');
continue;
}
- if (VNI->def == Idx) {
- // Special case: An early-clobber tied operand reads and writes the
- // register one slot early.
- Idx = Idx.getPrevSlot();
- VNI = li->getVNInfoAt(Idx);
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early. The getVNInfoBefore call above would have
+ // picked up the value defined by UseMI. Adjust the kill slot and value.
+ if (SlotIndex::isSameInstr(VNI->def, Idx)) {
+ Idx = VNI->def;
+ VNI = li->getVNInfoBefore(Idx);
assert(VNI && "Early-clobber tied value not available");
}
WorkList.push_back(std::make_pair(Idx, VNI));
@@ -794,14 +684,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
-
- // A use tied to an early-clobber def ends at the load slot and isn't caught
- // above. Catch it here instead. This probably only ever happens for inline
- // assembly.
- if (VNI->def.isUse())
- if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
- WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
}
// Keep track of the PHIs that are in use.
@@ -812,11 +695,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SlotIndex Idx = WorkList.back().first;
VNInfo *VNI = WorkList.back().second;
WorkList.pop_back();
- const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
SlotIndex BlockStart = getMBBStartIdx(MBB);
// Extend the live range for VNI to be live at Idx.
- if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) {
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
(void)ExtVNI;
assert(ExtVNI == VNI && "Unexpected existing value number");
// Is this a PHIDef we haven't seen before?
@@ -827,9 +710,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
PE = MBB->pred_end(); PI != PE; ++PI) {
if (!LiveOut.insert(*PI))
continue;
- SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ SlotIndex Stop = getMBBEndIdx(*PI);
// A predecessor is not required to have a live-out value for a PHI.
- if (VNInfo *PVNI = li->getVNInfoAt(Stop))
+ if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
WorkList.push_back(std::make_pair(Stop, PVNI));
}
continue;
@@ -837,15 +720,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// VNI is live-in to MBB.
DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
- NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+ NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
if (!LiveOut.insert(*PI))
continue;
- SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
- assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ SlotIndex Stop = getMBBEndIdx(*PI);
+ assert(li->getVNInfoBefore(Stop) == VNI &&
+ "Wrong value out of predecessor");
WorkList.push_back(std::make_pair(Stop, VNI));
}
}
@@ -859,7 +743,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
assert(LII != NewLI.end() && "Missing live range for PHI");
- if (LII->end != VNI->def.getNextSlot())
+ if (LII->end != VNI->def.getDeadSlot())
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
@@ -890,28 +774,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Register allocator hooks.
//
-MachineBasicBlock::iterator
-LiveIntervals::getLastSplitPoint(const LiveInterval &li,
- MachineBasicBlock *mbb) const {
- const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
-
- // If li is not live into a landing pad, we can insert spill code before the
- // first terminator.
- if (!lpad || !isLiveInToMBB(li, lpad))
- return mbb->getFirstTerminator();
-
- // When there is a landing pad, spill code must go before the call instruction
- // that can throw.
- MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
- while (I != B) {
- --I;
- if (I->getDesc().isCall())
- return I;
- }
- // The block contains no calls that can throw, so use the first terminator.
- return mbb->getFirstTerminator();
-}
-
void LiveIntervals::addKillFlags() {
for (iterator I = begin(), E = end(); I != E; ++I) {
unsigned Reg = I->first;
@@ -924,8 +786,8 @@ void LiveIntervals::addKillFlags() {
// Every instruction that kills Reg corresponds to a live range end point.
for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
++RI) {
- // A LOAD index indicates an MBB edge.
- if (RI->end.isLoad())
+ // A block index indicates an MBB edge.
+ if (RI->end.isBlock())
continue;
MachineInstr *MI = getInstructionFromIndex(RI->end);
if (!MI)
@@ -949,16 +811,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
if (Reg == 0 || Reg == li.reg)
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
- !allocatableRegs_[Reg])
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg))
continue;
- // FIXME: For now, only remat MI with at most one register operand.
- assert(!RegOp &&
- "Can't rematerialize instruction with multiple register operand!");
RegOp = MO.getReg();
-#ifndef NDEBUG
- break;
-#endif
+ break; // Found vreg operand - leave the loop.
}
return RegOp;
}
@@ -1011,14 +867,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
return true;
}
-/// isReMaterializable - Returns true if the definition MI of the specified
-/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI) {
- bool Dummy2;
- return isReMaterializable(li, ValNo, MI, 0, Dummy2);
-}
-
/// isReMaterializable - Returns true if every definition of MI of every
/// val# of the specified interval is re-materializable.
bool
@@ -1044,1141 +892,653 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
return true;
}
-/// FilterFoldedOps - Filter out two-address use operands. Return
-/// true if it finds any issue with the operands that ought to prevent
-/// folding.
-static bool FilterFoldedOps(MachineInstr *MI,
- SmallVector<unsigned, 2> &Ops,
- unsigned &MRInfo,
- SmallVector<unsigned, 2> &FoldOps) {
- MRInfo = 0;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- unsigned OpIdx = Ops[i];
- MachineOperand &MO = MI->getOperand(OpIdx);
- // FIXME: fold subreg use.
- if (MO.getSubReg())
- return true;
- if (MO.isDef())
- MRInfo |= (unsigned)VirtRegMap::isMod;
- else {
- // Filter out two-address use operand(s).
- if (MI->isRegTiedToDefOperand(OpIdx)) {
- MRInfo = VirtRegMap::isModRef;
- continue;
- }
- MRInfo |= (unsigned)VirtRegMap::isRef;
- }
- FoldOps.push_back(OpIdx);
- }
- return false;
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ // A local live range must be fully contained inside the block, meaning it is
+ // defined and killed at instructions, not at block boundaries. It is not
+ // live in or or out of any block.
+ //
+ // It is technically possible to have a PHI-defined live range identical to a
+ // single block, but we are going to return false in that case.
+
+ SlotIndex Start = LI.beginIndex();
+ if (Start.isBlock())
+ return NULL;
+
+ SlotIndex Stop = LI.endIndex();
+ if (Stop.isBlock())
+ return NULL;
+
+ // getMBBFromIndex doesn't need to search the MBB table when both indexes
+ // belong to proper instructions.
+ MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop);
+ return MBB1 == MBB2 ? MBB1 : NULL;
}
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
-/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
-/// slot / to reg or any rematerialized load into ith operand of specified
-/// MI. If it is successul, MI is updated with the newly created MI and
-/// returns true.
-bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
- VirtRegMap &vrm, MachineInstr *DefMI,
- SlotIndex InstrIdx,
- SmallVector<unsigned, 2> &Ops,
- bool isSS, int Slot, unsigned Reg) {
- // If it is an implicit def instruction, just delete it.
- if (MI->isImplicitDef()) {
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++numFolds;
- return true;
- }
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ // By the way, powf() might be unavailable here. For consistency,
+ // We may take pow(double,double).
+ float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
- // Filter the list of operand indexes that are to be folded. Abort if
- // any operand will prevent folding.
- unsigned MRInfo = 0;
- SmallVector<unsigned, 2> FoldOps;
- if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
- return false;
+ return (isDef + isUse) * lc;
+}
- // The only time it's safe to fold into a two address instruction is when
- // it's folding reload and spill from / into a spill stack slot.
- if (DefMI && (MRInfo & VirtRegMap::isMod))
- return false;
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ VN->setHasPHIKill(true);
+ LiveRange LR(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst->getParent()), VN);
+ Interval.addRange(LR);
- MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
- : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
- if (fmi) {
- // Remember this instruction uses the spill slot.
- if (isSS) vrm.addSpillSlotUse(Slot, fmi);
-
- // Attempt to fold the memory reference into the instruction. If
- // we can do this, we don't need to insert spill code.
- if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
- vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
- vrm.transferSpillPts(MI, fmi);
- vrm.transferRestorePts(MI, fmi);
- vrm.transferEmergencySpills(MI, fmi);
- ReplaceMachineInstrInMaps(MI, fmi);
- MI->eraseFromParent();
- MI = fmi;
- ++numFolds;
- return true;
- }
- return false;
+ return LR;
}
-/// canFoldMemoryOperand - Returns true if the specified load / store
-/// folding is possible.
-bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
- SmallVector<unsigned, 2> &Ops,
- bool ReMat) const {
- // Filter the list of operand indexes that are to be folded. Abort if
- // any operand will prevent folding.
- unsigned MRInfo = 0;
- SmallVector<unsigned, 2> FoldOps;
- if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+
+//===----------------------------------------------------------------------===//
+// Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+ BitVector &UsableRegs) {
+ if (LI.empty())
return false;
+ LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+ // Use a smaller arrays for local live ranges.
+ ArrayRef<SlotIndex> Slots;
+ ArrayRef<const uint32_t*> Bits;
+ if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+ Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+ Bits = getRegMaskBitsInBlock(MBB->getNumber());
+ } else {
+ Slots = getRegMaskSlots();
+ Bits = getRegMaskBits();
+ }
- // It's only legal to remat for a use, not a def.
- if (ReMat && (MRInfo & VirtRegMap::isMod))
+ // We are going to enumerate all the register mask slots contained in LI.
+ // Start with a binary search of RegMaskSlots to find a starting point.
+ ArrayRef<SlotIndex>::iterator SlotI =
+ std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // No slots in range, LI begins after the last call.
+ if (SlotI == SlotE)
return false;
- return tii_->canFoldMemoryOperand(MI, FoldOps);
+ bool Found = false;
+ for (;;) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ if (!Found) {
+ // This is the first overlap. Initialize UsableRegs to all ones.
+ UsableRegs.clear();
+ UsableRegs.resize(tri_->getNumRegs(), true);
+ Found = true;
+ }
+ // Remove usable registers clobbered by this mask.
+ UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+ if (++SlotI == SlotE)
+ return Found;
+ }
+ // *SlotI is beyond the current LI segment.
+ LiveI = LI.advanceTo(LiveI, *SlotI);
+ if (LiveI == LiveE)
+ return Found;
+ // Advance SlotI until it overlaps.
+ while (*SlotI < LiveI->start)
+ if (++SlotI == SlotE)
+ return Found;
+ }
}
-bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
- LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+//===----------------------------------------------------------------------===//
+// IntervalUpdate class.
+//===----------------------------------------------------------------------===//
- MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end);
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+ LiveIntervals& LIS;
+ const MachineRegisterInfo& MRI;
+ const TargetRegisterInfo& TRI;
+ SlotIndex NewIdx;
+
+ typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
+ typedef DenseSet<IntRangePair> RangeSet;
+
+ struct RegRanges {
+ LiveRange* Use;
+ LiveRange* EC;
+ LiveRange* Dead;
+ LiveRange* Def;
+ RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
+ };
+ typedef DenseMap<unsigned, RegRanges> BundleRanges;
+
+public:
+ HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+ const TargetRegisterInfo& TRI, SlotIndex NewIdx)
+ : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
+
+ // Update intervals for all operands of MI from OldIdx to NewIdx.
+ // This assumes that MI used to be at OldIdx, and now resides at
+ // NewIdx.
+ void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
+ assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
+
+ // Collect the operands.
+ RangeSet Entering, Internal, Exiting;
+ bool hasRegMaskOp = false;
+ collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+
+ // To keep the LiveRanges valid within an interval, move the ranges closest
+ // to the destination first. This prevents ranges from overlapping, to that
+ // APIs like removeRange still work.
+ if (NewIdx < OldIdx) {
+ moveAllEnteringFrom(OldIdx, Entering);
+ moveAllInternalFrom(OldIdx, Internal);
+ moveAllExitingFrom(OldIdx, Exiting);
+ }
+ else {
+ moveAllExitingFrom(OldIdx, Exiting);
+ moveAllInternalFrom(OldIdx, Internal);
+ moveAllEnteringFrom(OldIdx, Entering);
+ }
- if (mbb == 0)
- return false;
+ if (hasRegMaskOp)
+ updateRegMaskSlots(OldIdx);
- for (++itr; itr != li.ranges.end(); ++itr) {
- MachineBasicBlock *mbb2 =
- indexes_->getMBBCoveringRange(itr->start, itr->end);
+#ifndef NDEBUG
+ LIValidator validator;
+ std::for_each(Entering.begin(), Entering.end(), validator);
+ std::for_each(Internal.begin(), Internal.end(), validator);
+ std::for_each(Exiting.begin(), Exiting.end(), validator);
+ assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
+#endif
- if (mbb2 != mbb)
- return false;
}
- return true;
-}
+ // Update intervals for all operands of MI to refer to BundleStart's
+ // SlotIndex.
+ void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
+ if (MI == BundleStart)
+ return; // Bundling instr with itself - nothing to do.
+
+ SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
+ "SlotIndex <-> Instruction mapping broken for MI");
+
+ // Collect all ranges already in the bundle.
+ MachineBasicBlock::instr_iterator BII(BundleStart);
+ RangeSet Entering, Internal, Exiting;
+ bool hasRegMaskOp = false;
+ collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+ assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+ for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
+ if (&*BII == MI)
+ continue;
+ collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+ assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+ }
-/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
-/// interval on to-be re-materialized operands of MI) with new register.
-void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
- MachineInstr *MI, unsigned NewVReg,
- VirtRegMap &vrm) {
- // There is an implicit use. That means one of the other operand is
- // being remat'ed and the remat'ed instruction has li.reg as an
- // use operand. Make sure we rewrite that as well.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (!vrm.isReMaterialized(Reg))
- continue;
- MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
- MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
- if (UseMO)
- UseMO->setReg(NewVReg);
- }
-}
+ BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
-/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
-bool LiveIntervals::
-rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
- bool TrySplit, SlotIndex index, SlotIndex end,
- MachineInstr *MI,
- MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
- unsigned Slot, int LdSlot,
- bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
- VirtRegMap &vrm,
- const TargetRegisterClass* rc,
- SmallVector<int, 4> &ReMatIds,
- const MachineLoopInfo *loopInfo,
- unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
- DenseMap<unsigned,unsigned> &MBBVRegsMap,
- std::vector<LiveInterval*> &NewLIs) {
- bool CanFold = false;
- RestartInstruction:
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& mop = MI->getOperand(i);
- if (!mop.isReg())
- continue;
- unsigned Reg = mop.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (Reg != li.reg)
- continue;
+ collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+ assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
- bool TryFold = !DefIsReMat;
- bool FoldSS = true; // Default behavior unless it's a remat.
- int FoldSlot = Slot;
- if (DefIsReMat) {
- // If this is the rematerializable definition MI itself and
- // all of its uses are rematerialized, simply delete it.
- if (MI == ReMatOrigDefMI && CanDelete) {
- DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
- << *MI << '\n');
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- break;
- }
+ DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
+ DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
+ DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
- // If def for this use can't be rematerialized, then try folding.
- // If def is rematerializable and it's a load, also try folding.
- TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
- if (isLoad) {
- // Try fold loads (from stack slot, constant pool, etc.) into uses.
- FoldSS = isLoadSS;
- FoldSlot = LdSlot;
- }
- }
+ moveAllEnteringFromInto(OldIdx, Entering, BR);
+ moveAllInternalFromInto(OldIdx, Internal, BR);
+ moveAllExitingFromInto(OldIdx, Exiting, BR);
- // Scan all of the operands of this instruction rewriting operands
- // to use NewVReg instead of li.reg as appropriate. We do this for
- // two reasons:
- //
- // 1. If the instr reads the same spilled vreg multiple times, we
- // want to reuse the NewVReg.
- // 2. If the instr is a two-addr instruction, we are required to
- // keep the src/dst regs pinned.
- //
- // Keep track of whether we replace a use and/or def so that we can
- // create the spill interval with the appropriate range.
- SmallVector<unsigned, 2> Ops;
- tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
-
- // Create a new virtual register for the spill interval.
- // Create the new register now so we can map the fold instruction
- // to the new register so when it is unfolded we get the correct
- // answer.
- bool CreatedNewVReg = false;
- if (NewVReg == 0) {
- NewVReg = mri_->createVirtualRegister(rc);
- vrm.grow();
- CreatedNewVReg = true;
-
- // The new virtual register should get the same allocation hints as the
- // old one.
- std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
- if (Hint.first || Hint.second)
- mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
- }
- if (!TryFold)
- CanFold = false;
- else {
- // Do not fold load / store here if we are splitting. We'll find an
- // optimal point to insert a load / store later.
- if (!TrySplit) {
- if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
- Ops, FoldSS, FoldSlot, NewVReg)) {
- // Folding the load/store can completely change the instruction in
- // unpredictable ways, rescan it from the beginning.
-
- if (FoldSS) {
- // We need to give the new vreg the same stack slot as the
- // spilled interval.
- vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
- }
-
- HasUse = false;
- HasDef = false;
- CanFold = false;
- if (isNotInMIMap(MI))
- break;
- goto RestartInstruction;
- }
- } else {
- // We'll try to fold it later if it's profitable.
- CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
- }
- }
+#ifndef NDEBUG
+ LIValidator validator;
+ std::for_each(Entering.begin(), Entering.end(), validator);
+ std::for_each(Internal.begin(), Internal.end(), validator);
+ std::for_each(Exiting.begin(), Exiting.end(), validator);
+ assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
+#endif
+ }
- mop.setReg(NewVReg);
- if (mop.isImplicit())
- rewriteImplicitOps(li, MI, NewVReg, vrm);
-
- // Reuse NewVReg for other reads.
- bool HasEarlyClobber = false;
- for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
- MachineOperand &mopj = MI->getOperand(Ops[j]);
- mopj.setReg(NewVReg);
- if (mopj.isImplicit())
- rewriteImplicitOps(li, MI, NewVReg, vrm);
- if (mopj.isEarlyClobber())
- HasEarlyClobber = true;
- }
+private:
- if (CreatedNewVReg) {
- if (DefIsReMat) {
- vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
- if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
- // Each valnum may have its own remat id.
- ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
- } else {
- vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
- }
- if (!CanDelete || (HasUse && HasDef)) {
- // If this is a two-addr instruction then its use operands are
- // rematerializable but its def is not. It should be assigned a
- // stack slot.
- vrm.assignVirt2StackSlot(NewVReg, Slot);
- }
- } else {
- vrm.assignVirt2StackSlot(NewVReg, Slot);
+#ifndef NDEBUG
+ class LIValidator {
+ private:
+ DenseSet<const LiveInterval*> Checked, Bogus;
+ public:
+ void operator()(const IntRangePair& P) {
+ const LiveInterval* LI = P.first;
+ if (Checked.count(LI))
+ return;
+ Checked.insert(LI);
+ if (LI->empty())
+ return;
+ SlotIndex LastEnd = LI->begin()->start;
+ for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
+ LRI != LRE; ++LRI) {
+ const LiveRange& LR = *LRI;
+ if (LastEnd > LR.start || LR.start >= LR.end)
+ Bogus.insert(LI);
+ LastEnd = LR.end;
}
- } else if (HasUse && HasDef &&
- vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
- // If this interval hasn't been assigned a stack slot (because earlier
- // def is a deleted remat def), do it now.
- assert(Slot != VirtRegMap::NO_STACK_SLOT);
- vrm.assignVirt2StackSlot(NewVReg, Slot);
}
- // Re-matting an instruction with virtual register use. Add the
- // register as an implicit use on the use MI.
- if (DefIsReMat && ImpUse)
- MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
-
- // Create a new register interval for this spill / remat.
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- if (CreatedNewVReg) {
- NewLIs.push_back(&nI);
- MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
- if (TrySplit)
- vrm.setIsSplitFromReg(NewVReg, li.reg);
+ bool rangesOk() const {
+ return Bogus.empty();
}
+ };
+#endif
- if (HasUse) {
- if (CreatedNewVReg) {
- LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
- nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- } else {
- // Extend the split live interval to this def / use.
- SlotIndex End = index.getDefIndex();
- LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
- nI.getValNumInfo(nI.getNumValNums()-1));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
+ // Collect IntRangePairs for all operands of MI that may need fixing.
+ // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
+ // maps).
+ void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
+ RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
+ hasRegMaskOp = false;
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ const MachineOperand& MO = *MOI;
+
+ if (MO.isRegMask()) {
+ hasRegMaskOp = true;
+ continue;
}
- }
- if (HasDef) {
- // An early clobber starts at the use slot, except for an early clobber
- // tied to a use operand (yes, that is a thing).
- LiveRange LR(HasEarlyClobber && !HasUse ?
- index.getUseIndex() : index.getDefIndex(),
- index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- }
- DEBUG({
- dbgs() << "\t\t\t\tAdded new interval: ";
- nI.print(dbgs(), tri_);
- dbgs() << '\n';
- });
- }
- return CanFold;
-}
-bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
- const VNInfo *VNI,
- MachineBasicBlock *MBB,
- SlotIndex Idx) const {
- return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
-}
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
-/// RewriteInfo - Keep track of machine instrs that will be rewritten
-/// during spilling.
-namespace {
- struct RewriteInfo {
- SlotIndex Index;
- MachineInstr *MI;
- RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
- };
+ unsigned Reg = MO.getReg();
- struct RewriteInfoCompare {
- bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
- return LHS.Index < RHS.Index;
- }
- };
-}
+ // TODO: Currently we're skipping uses that are reserved or have no
+ // interval, but we're not updating their kills. This should be
+ // fixed.
+ if (!LIS.hasInterval(Reg) ||
+ (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+ continue;
-void LiveIntervals::
-rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
- LiveInterval::Ranges::const_iterator &I,
- MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
- unsigned Slot, int LdSlot,
- bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
- VirtRegMap &vrm,
- const TargetRegisterClass* rc,
- SmallVector<int, 4> &ReMatIds,
- const MachineLoopInfo *loopInfo,
- BitVector &SpillMBBs,
- DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
- BitVector &RestoreMBBs,
- DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
- DenseMap<unsigned,unsigned> &MBBVRegsMap,
- std::vector<LiveInterval*> &NewLIs) {
- bool AllCanFold = true;
- unsigned NewVReg = 0;
- SlotIndex start = I->start.getBaseIndex();
- SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-
- // First collect all the def / use in this live range that will be rewritten.
- // Make sure they are sorted according to instruction index.
- std::vector<RewriteInfo> RewriteMIs;
- for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
- re = mri_->reg_end(); ri != re; ) {
- MachineInstr *MI = &*ri;
- MachineOperand &O = ri.getOperand();
- ++ri;
- if (MI->isDebugValue()) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
- uint64_t Offset = MI->getOperand(1).getImm();
- const MDNode *MDPtr = MI->getOperand(2).getMetadata();
- DebugLoc DL = MI->getDebugLoc();
- int FI = isLoadSS ? LdSlot : (int)Slot;
- if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
- Offset, MDPtr, DL)) {
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
- ReplaceMachineInstrInMaps(MI, NewDV);
- MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MBB->erase(MI), NewDV);
- continue;
+ LiveInterval* LI = &LIS.getInterval(Reg);
+
+ if (MO.readsReg()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
+ if (LR != 0)
+ Entering.insert(std::make_pair(LI, LR));
+ }
+ if (MO.isDef()) {
+ if (MO.isEarlyClobber()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true));
+ assert(LR != 0 && "No EC range?");
+ if (LR->end > OldIdx.getDeadSlot())
+ Exiting.insert(std::make_pair(LI, LR));
+ else
+ Internal.insert(std::make_pair(LI, LR));
+ } else if (MO.isDead()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
+ assert(LR != 0 && "No dead-def range?");
+ Internal.insert(std::make_pair(LI, LR));
+ } else {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot());
+ assert(LR && LR->end > OldIdx.getDeadSlot() &&
+ "Non-dead-def should have live range exiting.");
+ Exiting.insert(std::make_pair(LI, LR));
}
}
-
- DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- continue;
}
- assert(!(O.isImplicit() && O.isUse()) &&
- "Spilling register that's used as implicit use?");
- SlotIndex index = getInstructionIndex(MI);
- if (index < start || index >= end)
- continue;
-
- if (O.isUndef())
- // Must be defined by an implicit def. It should not be spilled. Note,
- // this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- continue;
- RewriteMIs.push_back(RewriteInfo(index, MI));
}
- std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
-
- unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
- // Now rewrite the defs and uses.
- for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
- RewriteInfo &rwi = RewriteMIs[i];
- ++i;
- SlotIndex index = rwi.Index;
- MachineInstr *MI = rwi.MI;
- // If MI def and/or use the same register multiple times, then there
- // are multiple entries.
- while (i != e && RewriteMIs[i].MI == MI) {
- assert(RewriteMIs[i].Index == index);
- ++i;
- }
- MachineBasicBlock *MBB = MI->getParent();
- if (ImpUse && MI != ReMatDefMI) {
- // Re-matting an instruction with virtual register use. Prevent interval
- // from being spilled.
- getInterval(ImpUse).markNotSpillable();
- }
+ // Collect IntRangePairs for all operands of MI that may need fixing.
+ void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering,
+ RangeSet& Exiting, SlotIndex MIStartIdx,
+ SlotIndex MIEndIdx) {
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ const MachineOperand& MO = *MOI;
+ assert(!MO.isRegMask() && "Can't have RegMasks in bundles.");
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
- unsigned MBBId = MBB->getNumber();
- unsigned ThisVReg = 0;
- if (TrySplit) {
- DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
- if (NVI != MBBVRegsMap.end()) {
- ThisVReg = NVI->second;
- // One common case:
- // x = use
- // ...
- // ...
- // def = ...
- // = use
- // It's better to start a new interval to avoid artificially
- // extend the new interval.
- if (MI->readsWritesVirtualRegister(li.reg) ==
- std::make_pair(false,true)) {
- MBBVRegsMap.erase(MBB->getNumber());
- ThisVReg = 0;
- }
- }
- }
+ unsigned Reg = MO.getReg();
+
+ // TODO: Currently we're skipping uses that are reserved or have no
+ // interval, but we're not updating their kills. This should be
+ // fixed.
+ if (!LIS.hasInterval(Reg) ||
+ (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+ continue;
- bool IsNew = ThisVReg == 0;
- if (IsNew) {
- // This ends the previous live interval. If all of its def / use
- // can be folded, give it a low spill weight.
- if (NewVReg && TrySplit && AllCanFold) {
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- nI.weight /= 10.0F;
+ LiveInterval* LI = &LIS.getInterval(Reg);
+
+ if (MO.readsReg()) {
+ LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx);
+ if (LR != 0)
+ Entering.insert(std::make_pair(LI, LR));
+ }
+ if (MO.isDef()) {
+ assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles.");
+ assert(!MO.isDead() && "Dead-defs not allowed in bundles.");
+ LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot());
+ assert(LR != 0 && "Internal ranges not allowed in bundles.");
+ Exiting.insert(std::make_pair(LI, LR));
}
- AllCanFold = true;
}
- NewVReg = ThisVReg;
-
- bool HasDef = false;
- bool HasUse = false;
- bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
- index, end, MI, ReMatOrigDefMI, ReMatDefMI,
- Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
- CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
- ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
- if (!HasDef && !HasUse)
- continue;
+ }
- AllCanFold &= CanFold;
+ BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) {
+ BundleRanges BR;
- // Update weight of spill interval.
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- if (!TrySplit) {
- // The spill weight is now infinity as it cannot be spilled again.
- nI.markNotSpillable();
- continue;
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI) {
+ LiveInterval* LI = EI->first;
+ LiveRange* LR = EI->second;
+ BR[LI->reg].Use = LR;
}
- // Keep track of the last def and first use in each MBB.
- if (HasDef) {
- if (MI != ReMatOrigDefMI || !CanDelete) {
- bool HasKill = false;
- if (!HasUse)
- HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
- else {
- // If this is a two-address code, then this index starts a new VNInfo.
- const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
- if (VNI)
- HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
- }
- DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
- SpillIdxes.find(MBBId);
- if (!HasKill) {
- if (SII == SpillIdxes.end()) {
- std::vector<SRInfo> S;
- S.push_back(SRInfo(index, NewVReg, true));
- SpillIdxes.insert(std::make_pair(MBBId, S));
- } else if (SII->second.back().vreg != NewVReg) {
- SII->second.push_back(SRInfo(index, NewVReg, true));
- } else if (index > SII->second.back().index) {
- // If there is an earlier def and this is a two-address
- // instruction, then it's not possible to fold the store (which
- // would also fold the load).
- SRInfo &Info = SII->second.back();
- Info.index = index;
- Info.canFold = !HasUse;
- }
- SpillMBBs.set(MBBId);
- } else if (SII != SpillIdxes.end() &&
- SII->second.back().vreg == NewVReg &&
- index > SII->second.back().index) {
- // There is an earlier def that's not killed (must be two-address).
- // The spill is no longer needed.
- SII->second.pop_back();
- if (SII->second.empty()) {
- SpillIdxes.erase(MBBId);
- SpillMBBs.reset(MBBId);
- }
- }
+ for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+ II != IE; ++II) {
+ LiveInterval* LI = II->first;
+ LiveRange* LR = II->second;
+ if (LR->end.isDead()) {
+ BR[LI->reg].Dead = LR;
+ } else {
+ BR[LI->reg].EC = LR;
}
}
- if (HasUse) {
- DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
- SpillIdxes.find(MBBId);
- if (SII != SpillIdxes.end() &&
- SII->second.back().vreg == NewVReg &&
- index > SII->second.back().index)
- // Use(s) following the last def, it's not safe to fold the spill.
- SII->second.back().canFold = false;
- DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
- RestoreIdxes.find(MBBId);
- if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
- // If we are splitting live intervals, only fold if it's the first
- // use and there isn't another use later in the MBB.
- RII->second.back().canFold = false;
- else if (IsNew) {
- // Only need a reload if there isn't an earlier def / use.
- if (RII == RestoreIdxes.end()) {
- std::vector<SRInfo> Infos;
- Infos.push_back(SRInfo(index, NewVReg, true));
- RestoreIdxes.insert(std::make_pair(MBBId, Infos));
- } else {
- RII->second.push_back(SRInfo(index, NewVReg, true));
- }
- RestoreMBBs.set(MBBId);
- }
+ for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+ EI != EE; ++EI) {
+ LiveInterval* LI = EI->first;
+ LiveRange* LR = EI->second;
+ BR[LI->reg].Def = LR;
}
- // Update spill weight.
- unsigned loopDepth = loopInfo->getLoopDepth(MBB);
- nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+ return BR;
}
- if (NewVReg && TrySplit && AllCanFold) {
- // If all of its def / use can be folded, give it a low spill weight.
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- nI.weight /= 10.0F;
+ void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
+ MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
+ if (!OldKillMI->killsRegister(reg))
+ return; // Bail out if we don't have kill flags on the old register.
+ MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
+ assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
+ assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill.");
+ OldKillMI->clearRegisterKills(reg, &TRI);
+ NewKillMI->addRegisterKilled(reg, &TRI);
}
-}
-bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
- unsigned vr, BitVector &RestoreMBBs,
- DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
- if (!RestoreMBBs[Id])
- return false;
- std::vector<SRInfo> &Restores = RestoreIdxes[Id];
- for (unsigned i = 0, e = Restores.size(); i != e; ++i)
- if (Restores[i].index == index &&
- Restores[i].vreg == vr &&
- Restores[i].canFold)
- return true;
- return false;
-}
-
-void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
- unsigned vr, BitVector &RestoreMBBs,
- DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
- if (!RestoreMBBs[Id])
- return;
- std::vector<SRInfo> &Restores = RestoreIdxes[Id];
- for (unsigned i = 0, e = Restores.size(); i != e; ++i)
- if (Restores[i].index == index && Restores[i].vreg)
- Restores[i].index = SlotIndex();
-}
+ void updateRegMaskSlots(SlotIndex OldIdx) {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+ OldIdx);
+ assert(*RI == OldIdx && "No RegMask at OldIdx.");
+ *RI = NewIdx;
+ assert(*prior(RI) < *RI && *RI < *next(RI) &&
+ "RegSlots out of order. Did you move one call across another?");
+ }
-/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
-/// spilled and create empty intervals for their uses.
-void
-LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
- const TargetRegisterClass* rc,
- std::vector<LiveInterval*> &NewLIs) {
- for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
- re = mri_->reg_end(); ri != re; ) {
- MachineOperand &O = ri.getOperand();
- MachineInstr *MI = &*ri;
- ++ri;
- if (MI->isDebugValue()) {
- // Remove debug info for now.
- O.setReg(0U);
- DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
- continue;
- }
- if (O.isDef()) {
- assert(MI->isImplicitDef() &&
- "Register def was not rewritten?");
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- } else {
- // This must be an use of an implicit_def so it's not part of the live
- // interval. Create a new empty live interval for it.
- // FIXME: Can we simply erase some of the instructions? e.g. Stores?
- unsigned NewVReg = mri_->createVirtualRegister(rc);
- vrm.grow();
- vrm.setIsImplicitlyDefined(NewVReg);
- NewLIs.push_back(&getOrCreateInterval(NewVReg));
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == li.reg) {
- MO.setReg(NewVReg);
- MO.setIsUndef();
- }
- }
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+ SlotIndex LastUse = NewIdx;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(Reg),
+ UE = MRI.use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot;
}
+ return LastUse;
}
-}
-
-float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
- // Limit the loop depth ridiculousness.
- if (loopDepth > 200)
- loopDepth = 200;
-
- // The loop depth is used to roughly estimate the number of times the
- // instruction is executed. Something like 10^d is simple, but will quickly
- // overflow a float. This expression behaves like 10^d for small d, but is
- // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
- // headroom before overflow.
- // By the way, powf() might be unavailable here. For consistency,
- // We may take pow(double,double).
- float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
-
- return (isDef + isUse) * lc;
-}
-static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
- for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
- NewLIs[i]->weight =
- normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
-}
+ void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ bool LiveThrough = LR->end > OldIdx.getRegSlot();
+ if (LiveThrough)
+ return;
+ SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+ if (LastUse != NewIdx)
+ moveKillFlags(LI->reg, NewIdx, LastUse);
+ LR->end = LastUse.getRegSlot();
+ }
-std::vector<LiveInterval*> LiveIntervals::
-addIntervalsForSpills(const LiveInterval &li,
- const SmallVectorImpl<LiveInterval*> *SpillIs,
- const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
- assert(li.isSpillable() && "attempt to spill already spilled interval!");
-
- DEBUG({
- dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
- li.print(dbgs(), tri_);
- dbgs() << '\n';
- });
-
- // Each bit specify whether a spill is required in the MBB.
- BitVector SpillMBBs(mf_->getNumBlockIDs());
- DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
- BitVector RestoreMBBs(mf_->getNumBlockIDs());
- DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
- DenseMap<unsigned,unsigned> MBBVRegsMap;
- std::vector<LiveInterval*> NewLIs;
- const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
-
- unsigned NumValNums = li.getNumValNums();
- SmallVector<MachineInstr*, 4> ReMatDefs;
- ReMatDefs.resize(NumValNums, NULL);
- SmallVector<MachineInstr*, 4> ReMatOrigDefs;
- ReMatOrigDefs.resize(NumValNums, NULL);
- SmallVector<int, 4> ReMatIds;
- ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
- BitVector ReMatDelete(NumValNums);
- unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
-
- // Spilling a split live interval. It cannot be split any further. Also,
- // it's also guaranteed to be a single val# / range interval.
- if (vrm.getPreSplitReg(li.reg)) {
- vrm.setIsSplitFromReg(li.reg, 0);
- // Unset the split kill marker on the last use.
- SlotIndex KillIdx = vrm.getKillPoint(li.reg);
- if (KillIdx != SlotIndex()) {
- MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
- assert(KillMI && "Last use disappeared?");
- int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
- assert(KillOp != -1 && "Last use disappeared?");
- KillMI->getOperand(KillOp).setIsKill(false);
- }
- vrm.removeKillPoint(li.reg);
- bool DefIsReMat = vrm.isReMaterialized(li.reg);
- Slot = vrm.getStackSlot(li.reg);
- assert(Slot != VirtRegMap::MAX_STACK_SLOT);
- MachineInstr *ReMatDefMI = DefIsReMat ?
- vrm.getReMaterializedMI(li.reg) : NULL;
- int LdSlot = 0;
- bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
- bool isLoad = isLoadSS ||
- (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
- bool IsFirstRange = true;
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- // If this is a split live interval with multiple ranges, it means there
- // are two-address instructions that re-defined the value. Only the
- // first def can be rematerialized!
- if (IsFirstRange) {
- // Note ReMatOrigDefMI has already been deleted.
- rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
- Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
- false, vrm, rc, ReMatIds, loopInfo,
- SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
- MBBVRegsMap, NewLIs);
- } else {
- rewriteInstructionsForSpills(li, false, I, NULL, 0,
- Slot, 0, false, false, false,
- false, vrm, rc, ReMatIds, loopInfo,
- SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
- MBBVRegsMap, NewLIs);
+ void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ // Extend the LiveRange if NewIdx is past the end.
+ if (NewIdx > LR->end) {
+ // Move kill flags if OldIdx was not originally the end
+ // (otherwise LR->end points to an invalid slot).
+ if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
+ assert(LR->end > OldIdx && "LiveRange does not cover original slot");
+ moveKillFlags(LI->reg, LR->end, NewIdx);
}
- IsFirstRange = false;
+ LR->end = NewIdx.getRegSlot();
}
-
- handleSpilledImpDefs(li, vrm, rc, NewLIs);
- normalizeSpillWeights(NewLIs);
- return NewLIs;
}
- bool TrySplit = !intervalIsInOneMBB(li);
- if (TrySplit)
- ++numSplits;
- bool NeedStackSlot = false;
- for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
- i != e; ++i) {
- const VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (VNI->isUnused())
- continue; // Dead val#.
- // Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
- bool dummy;
- if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
- // Remember how to remat the def of this val#.
- ReMatOrigDefs[VN] = ReMatDefMI;
- // Original def may be modified so we have to make a copy here.
- MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
- CloneMIs.push_back(Clone);
- ReMatDefs[VN] = Clone;
-
- bool CanDelete = true;
- if (VNI->hasPHIKill()) {
- // A kill is a phi node, not all of its uses can be rematerialized.
- // It must not be deleted.
- CanDelete = false;
- // Need a stack slot if there is any live range where uses cannot be
- // rematerialized.
- NeedStackSlot = true;
- }
- if (CanDelete)
- ReMatDelete.set(VN);
+ void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
+ bool GoingUp = NewIdx < OldIdx;
+
+ if (GoingUp) {
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringUpFrom(OldIdx, *EI);
} else {
- // Need a stack slot if there is any live range where uses cannot be
- // rematerialized.
- NeedStackSlot = true;
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringDownFrom(OldIdx, *EI);
}
}
- // One stack slot per live interval.
- if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
- if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
- Slot = vrm.assignVirt2StackSlot(li.reg);
-
- // This case only occurs when the prealloc splitter has already assigned
- // a stack slot to this vreg.
- else
- Slot = vrm.getStackSlot(li.reg);
+ void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+ LR->end <= OldIdx.getDeadSlot() &&
+ "Range should be internal to OldIdx.");
+ LiveRange Tmp(*LR);
+ Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+ Tmp.valno->def = Tmp.start;
+ Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
+ LI->removeRange(*LR);
+ LI->addRange(Tmp);
}
- // Create new intervals and rewrite defs and uses.
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
- MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
- bool DefIsReMat = ReMatDefMI != NULL;
- bool CanDelete = ReMatDelete[I->valno->id];
- int LdSlot = 0;
- bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
- bool isLoad = isLoadSS ||
- (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
- rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
- Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
- CanDelete, vrm, rc, ReMatIds, loopInfo,
- SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
- MBBVRegsMap, NewLIs);
+ void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
+ for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+ II != IE; ++II)
+ moveInternalFrom(OldIdx, *II);
}
- // Insert spills / restores if we are splitting.
- if (!TrySplit) {
- handleSpilledImpDefs(li, vrm, rc, NewLIs);
- normalizeSpillWeights(NewLIs);
- return NewLIs;
+ void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveRange* LR = P.second;
+ assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+ "Range should start in OldIdx.");
+ assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
+ SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+ LR->start = NewStart;
+ LR->valno->def = NewStart;
}
- SmallPtrSet<LiveInterval*, 4> AddedKill;
- SmallVector<unsigned, 2> Ops;
- if (NeedStackSlot) {
- int Id = SpillMBBs.find_first();
- while (Id != -1) {
- std::vector<SRInfo> &spills = SpillIdxes[Id];
- for (unsigned i = 0, e = spills.size(); i != e; ++i) {
- SlotIndex index = spills[i].index;
- unsigned VReg = spills[i].vreg;
- LiveInterval &nI = getOrCreateInterval(VReg);
- bool isReMat = vrm.isReMaterialized(VReg);
- MachineInstr *MI = getInstructionFromIndex(index);
- bool CanFold = false;
- bool FoundUse = false;
- Ops.clear();
- if (spills[i].canFold) {
- CanFold = true;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = MI->getOperand(j);
- if (!MO.isReg() || MO.getReg() != VReg)
- continue;
-
- Ops.push_back(j);
- if (MO.isDef())
- continue;
- if (isReMat ||
- (!FoundUse && !alsoFoldARestore(Id, index, VReg,
- RestoreMBBs, RestoreIdxes))) {
- // MI has two-address uses of the same register. If the use
- // isn't the first and only use in the BB, then we can't fold
- // it. FIXME: Move this to rewriteInstructionsForSpills.
- CanFold = false;
- break;
- }
- FoundUse = true;
- }
- }
- // Fold the store into the def if possible.
- bool Folded = false;
- if (CanFold && !Ops.empty()) {
- if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
- Folded = true;
- if (FoundUse) {
- // Also folded uses, do not issue a load.
- eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
- nI.removeRange(index.getLoadIndex(), index.getDefIndex());
- }
- nI.removeRange(index.getDefIndex(), index.getStoreIndex());
- }
- }
-
- // Otherwise tell the spiller to issue a spill.
- if (!Folded) {
- LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
- bool isKill = LR->end == index.getStoreIndex();
- if (!MI->registerDefIsDead(nI.reg))
- // No need to spill a dead def.
- vrm.addSpillPoint(VReg, isKill, MI);
- if (isKill)
- AddedKill.insert(&nI);
- }
- }
- Id = SpillMBBs.find_next(Id);
- }
+ void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
+ for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+ EI != EE; ++EI)
+ moveExitingFrom(OldIdx, *EI);
}
- int Id = RestoreMBBs.find_first();
- while (Id != -1) {
- std::vector<SRInfo> &restores = RestoreIdxes[Id];
- for (unsigned i = 0, e = restores.size(); i != e; ++i) {
- SlotIndex index = restores[i].index;
- if (index == SlotIndex())
- continue;
- unsigned VReg = restores[i].vreg;
- LiveInterval &nI = getOrCreateInterval(VReg);
- bool isReMat = vrm.isReMaterialized(VReg);
- MachineInstr *MI = getInstructionFromIndex(index);
- bool CanFold = false;
- Ops.clear();
- if (restores[i].canFold) {
- CanFold = true;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = MI->getOperand(j);
- if (!MO.isReg() || MO.getReg() != VReg)
- continue;
-
- if (MO.isDef()) {
- // If this restore were to be folded, it would have been folded
- // already.
- CanFold = false;
- break;
- }
- Ops.push_back(j);
- }
- }
+ void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ bool LiveThrough = LR->end > OldIdx.getRegSlot();
+ if (LiveThrough) {
+ assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
+ "Def in bundle should be def range.");
+ assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+ "If bundle has use for this reg it should be LR.");
+ BR[LI->reg].Use = LR;
+ return;
+ }
- // Fold the load into the use if possible.
- bool Folded = false;
- if (CanFold && !Ops.empty()) {
- if (!isReMat)
- Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
- else {
- MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
- int LdSlot = 0;
- bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
- // If the rematerializable def is a load, also try to fold it.
- if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
- Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
- Ops, isLoadSS, LdSlot, VReg);
- if (!Folded) {
- unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
- if (ImpUse) {
- // Re-matting an instruction with virtual register use. Add the
- // register as an implicit use on the use MI and mark the register
- // interval as unspillable.
- LiveInterval &ImpLi = getInterval(ImpUse);
- ImpLi.markNotSpillable();
- MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
- }
- }
- }
- }
- // If folding is not possible / failed, then tell the spiller to issue a
- // load / rematerialization for us.
- if (Folded)
- nI.removeRange(index.getLoadIndex(), index.getDefIndex());
- else
- vrm.addRestorePoint(VReg, MI);
+ SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+ moveKillFlags(LI->reg, OldIdx, LastUse);
+
+ if (LR->start < NewIdx) {
+ // Becoming a new entering range.
+ assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
+ "Bundle shouldn't be re-defining reg mid-range.");
+ assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+ "Bundle shouldn't have different use range for same reg.");
+ LR->end = LastUse.getRegSlot();
+ BR[LI->reg].Use = LR;
+ } else {
+ // Becoming a new Dead-def.
+ assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
+ "Live range starting at unexpected slot.");
+ assert(BR[LI->reg].Def == LR && "Reg should have def range.");
+ assert(BR[LI->reg].Dead == 0 &&
+ "Can't have def and dead def of same reg in a bundle.");
+ LR->end = LastUse.getDeadSlot();
+ BR[LI->reg].Dead = BR[LI->reg].Def;
+ BR[LI->reg].Def = 0;
}
- Id = RestoreMBBs.find_next(Id);
}
- // Finalize intervals: add kills, finalize spill weights, and filter out
- // dead intervals.
- std::vector<LiveInterval*> RetNewLIs;
- for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
- LiveInterval *LI = NewLIs[i];
- if (!LI->empty()) {
- if (!AddedKill.count(LI)) {
- LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
- SlotIndex LastUseIdx = LR->end.getBaseIndex();
- MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
- int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
- assert(UseIdx != -1);
- if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
- LastUse->getOperand(UseIdx).setIsKill();
- vrm.addKillPoint(LI->reg, LastUseIdx);
- }
- }
- RetNewLIs.push_back(LI);
+ void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ if (NewIdx > LR->end) {
+ // Range extended to bundle. Add to bundle uses.
+ // Note: Currently adds kill flags to bundle start.
+ assert(BR[LI->reg].Use == 0 &&
+ "Bundle already has use range for reg.");
+ moveKillFlags(LI->reg, LR->end, NewIdx);
+ LR->end = NewIdx.getRegSlot();
+ BR[LI->reg].Use = LR;
+ } else {
+ assert(BR[LI->reg].Use != 0 &&
+ "Bundle should already have a use range for reg.");
}
}
- handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
- normalizeSpillWeights(RetNewLIs);
- return RetNewLIs;
-}
-
-/// hasAllocatableSuperReg - Return true if the specified physical register has
-/// any super register that's allocatable.
-bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
- for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
- if (allocatableRegs_[*AS] && hasInterval(*AS))
- return true;
- return false;
-}
+ void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
+ BundleRanges& BR) {
+ bool GoingUp = NewIdx < OldIdx;
-/// getRepresentativeReg - Find the largest super register of the specified
-/// physical register.
-unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
- // Find the largest super-register that is allocatable.
- unsigned BestReg = Reg;
- for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
- unsigned SuperReg = *AS;
- if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
- BestReg = SuperReg;
- break;
+ if (GoingUp) {
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringUpFromInto(OldIdx, *EI, BR);
+ } else {
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringDownFromInto(OldIdx, *EI, BR);
}
}
- return BestReg;
-}
-/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
-/// specified interval that conflicts with the specified physical register.
-unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
- unsigned PhysReg) const {
- unsigned NumConflicts = 0;
- const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- MachineInstr *MI = O.getParent();
- if (MI->isDebugValue())
- continue;
- SlotIndex Index = getInstructionIndex(MI);
- if (pli.liveAt(Index))
- ++NumConflicts;
+ void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ // TODO: Sane rules for moving ranges into bundles.
}
- return NumConflicts;
-}
-/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
-/// around all defs and uses of the specified interval. Return true if it
-/// was able to cut its interval.
-bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
- unsigned PhysReg, VirtRegMap &vrm) {
- unsigned SpillReg = getRepresentativeReg(PhysReg);
-
- DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
- << " represented by " << tri_->getName(SpillReg) << '\n');
-
- for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
- // If there are registers which alias PhysReg, but which are not a
- // sub-register of the chosen representative super register. Assert
- // since we can't handle it yet.
- assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
- tri_->isSuperRegister(*AS, SpillReg));
-
- bool Cut = false;
- SmallVector<unsigned, 4> PRegs;
- if (hasInterval(SpillReg))
- PRegs.push_back(SpillReg);
- for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
- if (hasInterval(*SR))
- PRegs.push_back(*SR);
-
- DEBUG({
- dbgs() << "Trying to spill:";
- for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
- dbgs() << ' ' << tri_->getName(PRegs[i]);
- dbgs() << '\n';
- });
-
- SmallPtrSet<MachineInstr*, 8> SeenMIs;
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- MachineInstr *MI = O.getParent();
- if (MI->isDebugValue() || SeenMIs.count(MI))
- continue;
- SeenMIs.insert(MI);
- SlotIndex Index = getInstructionIndex(MI);
- bool LiveReg = false;
- for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
- unsigned PReg = PRegs[i];
- LiveInterval &pli = getInterval(PReg);
- if (!pli.liveAt(Index))
- continue;
- LiveReg = true;
- SlotIndex StartIdx = Index.getLoadIndex();
- SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
- if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, tm_);
- }
- report_fatal_error(Msg.str());
+ void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
+ BundleRanges& BR) {
+ for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+ II != IE; ++II)
+ moveInternalFromInto(OldIdx, *II, BR);
+ }
+
+ void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+
+ assert(LR->start.isRegister() &&
+ "Don't know how to merge exiting ECs into bundles yet.");
+
+ if (LR->end > NewIdx.getDeadSlot()) {
+ // This range is becoming an exiting range on the bundle.
+ // If there was an old dead-def of this reg, delete it.
+ if (BR[LI->reg].Dead != 0) {
+ LI->removeRange(*BR[LI->reg].Dead);
+ BR[LI->reg].Dead = 0;
+ }
+ assert(BR[LI->reg].Def == 0 &&
+ "Can't have two defs for the same variable exiting a bundle.");
+ LR->start = NewIdx.getRegSlot();
+ LR->valno->def = LR->start;
+ BR[LI->reg].Def = LR;
+ } else {
+ // This range is becoming internal to the bundle.
+ assert(LR->end == NewIdx.getRegSlot() &&
+ "Can't bundle def whose kill is before the bundle");
+ if (BR[LI->reg].Dead || BR[LI->reg].Def) {
+ // Already have a def for this. Just delete range.
+ LI->removeRange(*LR);
+ } else {
+ // Make range dead, record.
+ LR->end = NewIdx.getDeadSlot();
+ BR[LI->reg].Dead = LR;
+ assert(BR[LI->reg].Use == LR &&
+ "Range becoming dead should currently be use.");
}
- pli.removeRange(StartIdx, EndIdx);
- LiveReg = true;
+ // In both cases the range is no longer a use on the bundle.
+ BR[LI->reg].Use = 0;
}
- if (!LiveReg)
- continue;
- DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
- vrm.addEmergencySpill(SpillReg, MI);
- Cut = true;
}
- return Cut;
-}
-LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
- MachineInstr* startInst) {
- LiveInterval& Interval = getOrCreateInterval(reg);
- VNInfo* VN = Interval.getNextValue(
- SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- startInst, getVNInfoAllocator());
- VN->setHasPHIKill(true);
- LiveRange LR(
- SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- getMBBEndIdx(startInst->getParent()), VN);
- Interval.addRange(LR);
+ void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
+ BundleRanges& BR) {
+ for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+ EI != EE; ++EI)
+ moveExitingFromInto(OldIdx, *EI, BR);
+ }
- return LR;
+};
+
+void LiveIntervals::handleMove(MachineInstr* MI) {
+ SlotIndex OldIndex = indexes_->getInstructionIndex(MI);
+ indexes_->removeMachineInstrFromMaps(MI);
+ SlotIndex NewIndex = MI->isInsideBundle() ?
+ indexes_->getInstructionIndex(MI) :
+ indexes_->insertMachineInstrInMaps(MI);
+ assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI->getParent()) &&
+ "Cannot handle moves across basic block boundaries.");
+ assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+
+ HMEditor HME(*this, *mri_, *tri_, NewIndex);
+ HME.moveAllRangesFrom(MI, OldIndex);
}
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) {
+ SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *mri_, *tri_, NewIndex);
+ HME.moveAllRangesInto(MI, BundleStart);
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 110fe1e62024..60a68806c55e 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -21,6 +21,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
using namespace llvm;
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 5d64d285f39a..dbf5ac122d5d 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -20,8 +20,6 @@
#include "llvm/ADT/IntervalMap.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include <algorithm>
-
namespace llvm {
class MachineLoopRange;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index a7d5af5198e5..d8ab7918ae25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -65,7 +65,7 @@ void LiveRangeCalc::extend(LiveInterval *LI,
assert(DomTree && "Missing dominator tree");
MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
- assert(Kill && "No MBB at Kill");
+ assert(KillMBB && "No MBB at Kill");
// Is there a def in the same MBB we can extend?
if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
@@ -237,7 +237,7 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
- VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc);
+ VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
VNI->setIsPHIDef(true);
I->Value = VNI;
// This block is done, we know the final value.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b23f85165360..695f53631e1b 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,4 +1,4 @@
-//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
@@ -29,13 +29,14 @@ STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
-LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
- LiveIntervals &LIS,
- VirtRegMap &VRM) {
- MachineRegisterInfo &MRI = VRM.getRegInfo();
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
- VRM.grow();
- VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+ if (VRM) {
+ VRM->grow();
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
LiveInterval &LI = LIS.getOrCreateInterval(VReg);
newRegs_.push_back(&LI);
return LI;
@@ -43,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
const MachineInstr *DefMI,
- const TargetInstrInfo &tii,
AliasAnalysis *aa) {
assert(DefMI && "Missing instruction");
scannedRemattable_ = true;
- if (!tii.isTriviallyReMaterializable(DefMI, aa))
+ if (!TII.isTriviallyReMaterializable(DefMI, aa))
return false;
remattable_.insert(VNI);
return true;
}
-void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
- const TargetInstrInfo &tii,
- AliasAnalysis *aa) {
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
for (LiveInterval::vni_iterator I = parent_.vni_begin(),
E = parent_.vni_end(); I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
if (!DefMI)
continue;
- checkRematerializable(VNI, DefMI, tii, aa);
+ checkRematerializable(VNI, DefMI, aa);
}
scannedRemattable_ = true;
}
-bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
- const TargetInstrInfo &tii,
- AliasAnalysis *aa) {
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
if (!scannedRemattable_)
- scanRemattable(lis, tii, aa);
+ scanRemattable(aa);
return !remattable_.empty();
}
@@ -81,24 +77,18 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
/// OrigIdx are also available with the same value at UseIdx.
bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
- SlotIndex UseIdx,
- LiveIntervals &lis) {
- OrigIdx = OrigIdx.getUseIndex();
- UseIdx = UseIdx.getUseIndex();
+ SlotIndex UseIdx) {
+ OrigIdx = OrigIdx.getRegSlot(true);
+ UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
if (!MO.isReg() || !MO.getReg() || MO.isDef())
continue;
// Reserved registers are OK.
- if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+ if (MO.isUndef() || !LIS.hasInterval(MO.getReg()))
continue;
- // We cannot depend on virtual registers in uselessRegs_.
- if (uselessRegs_)
- for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
- if ((*uselessRegs_)[ui]->reg == MO.getReg())
- return false;
- LiveInterval &li = lis.getInterval(MO.getReg());
+ LiveInterval &li = LIS.getInterval(MO.getReg());
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
if (!OVNI)
continue;
@@ -110,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
bool LiveRangeEdit::canRematerializeAt(Remat &RM,
SlotIndex UseIdx,
- bool cheapAsAMove,
- LiveIntervals &lis) {
+ bool cheapAsAMove) {
assert(scannedRemattable_ && "Call anyRematerializable first");
// Use scanRemattable info.
@@ -121,19 +110,19 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
// No defining instruction provided.
SlotIndex DefIdx;
if (RM.OrigMI)
- DefIdx = lis.getInstructionIndex(RM.OrigMI);
+ DefIdx = LIS.getInstructionIndex(RM.OrigMI);
else {
DefIdx = RM.ParentVNI->def;
- RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+ RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
assert(RM.OrigMI && "No defining instruction for remattable value");
}
// If only cheap remats were requested, bail out early.
- if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
return false;
// Verify that all used registers are available with the same values.
- if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
return false;
return true;
@@ -143,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg,
const Remat &RM,
- LiveIntervals &lis,
- const TargetInstrInfo &tii,
const TargetRegisterInfo &tri,
bool Late) {
assert(RM.OrigMI && "Invalid remat");
- tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
rematted_.insert(RM.ParentVNI);
- return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
- .getDefIndex();
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+ .getRegSlot();
}
-void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
LIS.removeInterval(Reg);
}
bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
- SmallVectorImpl<MachineInstr*> &Dead,
- MachineRegisterInfo &MRI,
- LiveIntervals &LIS,
- const TargetInstrInfo &TII) {
+ SmallVectorImpl<MachineInstr*> &Dead) {
MachineInstr *DefMI = 0, *UseMI = 0;
// Check that there is a single def and a single use.
@@ -174,7 +158,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (MO.isDef()) {
if (DefMI && DefMI != MI)
return false;
- if (!MI->getDesc().canFoldAsLoad())
+ if (!MI->canFoldAsLoad())
return false;
DefMI = MI;
} else if (!MO.isUndef()) {
@@ -209,19 +193,17 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
}
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- LiveIntervals &LIS, VirtRegMap &VRM,
- const TargetInstrInfo &TII) {
+ ArrayRef<unsigned> RegsBeingSpilled) {
SetVector<LiveInterval*,
SmallVector<LiveInterval*, 8>,
SmallPtrSet<LiveInterval*, 8> > ToShrink;
- MachineRegisterInfo &MRI = VRM.getRegInfo();
for (;;) {
// Erase all dead defs.
while (!Dead.empty()) {
MachineInstr *MI = Dead.pop_back_val();
assert(MI->allDefsAreDead() && "Def isn't really dead");
- SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
// Never delete inline asm.
if (MI->isInlineAsm()) {
@@ -265,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
LI.removeValNo(VNI);
if (LI.empty()) {
ToShrink.remove(&LI);
- eraseVirtReg(Reg, LIS);
+ eraseVirtReg(Reg);
}
}
}
@@ -284,12 +266,26 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Shrink just one live interval. Then delete new dead defs.
LiveInterval *LI = ToShrink.back();
ToShrink.pop_back();
- if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+ if (foldAsLoad(LI, Dead))
continue;
if (delegate_)
delegate_->LRE_WillShrinkVirtReg(LI->reg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ bool BeingSpilled = false;
+ for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+ if (LI->reg == RegsBeingSpilled[i]) {
+ BeingSpilled = true;
+ break;
+ }
+ }
+
+ if (BeingSpilled) continue;
// LI may have been separated, create new intervals.
LI->RenumberValues(LIS);
@@ -298,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (NumComp <= 1)
continue;
++NumFracRanges;
- bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+ bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
SmallVector<LiveInterval*, 8> Dups(1, LI);
for (unsigned i = 1; i != NumComp; ++i) {
- Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+ Dups.push_back(&createFrom(LI->reg));
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
if (IsOriginal)
- VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+ VRM->setIsSplitFromReg(Dups.back()->reg, 0);
if (delegate_)
delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
}
@@ -316,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
}
void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
- LiveIntervals &LIS,
const MachineLoopInfo &Loops) {
VirtRegAuxInfo VRAI(MF, LIS, Loops);
- MachineRegisterInfo &MRI = MF.getRegInfo();
for (iterator I = begin(), E = end(); I != E; ++I) {
LiveInterval &LI = **I;
if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
deleted file mode 100644
index 9b0a671ea9e5..000000000000
--- a/lib/CodeGen/LiveRangeEdit.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRangeEdit class represents changes done to a virtual register when it
-// is spilled or split.
-//
-// The parent register is never changed. Instead, a number of new virtual
-// registers are created and added to the newRegs vector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
-#define LLVM_CODEGEN_LIVERANGEEDIT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/LiveInterval.h"
-
-namespace llvm {
-
-class AliasAnalysis;
-class LiveIntervals;
-class MachineLoopInfo;
-class MachineRegisterInfo;
-class VirtRegMap;
-
-class LiveRangeEdit {
-public:
- /// Callback methods for LiveRangeEdit owners.
- struct Delegate {
- /// Called immediately before erasing a dead machine instruction.
- virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
-
- /// Called when a virtual register is no longer used. Return false to defer
- /// its deletion from LiveIntervals.
- virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
-
- /// Called before shrinking the live range of a virtual register.
- virtual void LRE_WillShrinkVirtReg(unsigned) {}
-
- /// Called after cloning a virtual register.
- /// This is used for new registers representing connected components of Old.
- virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
-
- virtual ~Delegate() {}
- };
-
-private:
- LiveInterval &parent_;
- SmallVectorImpl<LiveInterval*> &newRegs_;
- Delegate *const delegate_;
- const SmallVectorImpl<LiveInterval*> *uselessRegs_;
-
- /// firstNew_ - Index of the first register added to newRegs_.
- const unsigned firstNew_;
-
- /// scannedRemattable_ - true when remattable values have been identified.
- bool scannedRemattable_;
-
- /// remattable_ - Values defined by remattable instructions as identified by
- /// tii.isTriviallyReMaterializable().
- SmallPtrSet<const VNInfo*,4> remattable_;
-
- /// rematted_ - Values that were actually rematted, and so need to have their
- /// live range trimmed or entirely removed.
- SmallPtrSet<const VNInfo*,4> rematted_;
-
- /// scanRemattable - Identify the parent_ values that may rematerialize.
- void scanRemattable(LiveIntervals &lis,
- const TargetInstrInfo &tii,
- AliasAnalysis *aa);
-
- /// allUsesAvailableAt - Return true if all registers used by OrigMI at
- /// OrigIdx are also available with the same value at UseIdx.
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx, LiveIntervals &lis);
-
- /// foldAsLoad - If LI has a single use and a single def that can be folded as
- /// a load, eliminate the register by folding the def into the use.
- bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
- MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
-
-public:
- /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
- /// @param parent The register being spilled or split.
- /// @param newRegs List to receive any new registers created. This needn't be
- /// empty initially, any existing registers are ignored.
- /// @param uselessRegs List of registers that can't be used when
- /// rematerializing values because they are about to be removed.
- LiveRangeEdit(LiveInterval &parent,
- SmallVectorImpl<LiveInterval*> &newRegs,
- Delegate *delegate = 0,
- const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
- : parent_(parent), newRegs_(newRegs),
- delegate_(delegate),
- uselessRegs_(uselessRegs),
- firstNew_(newRegs.size()),
- scannedRemattable_(false) {}
-
- LiveInterval &getParent() const { return parent_; }
- unsigned getReg() const { return parent_.reg; }
-
- /// Iterator for accessing the new registers added by this edit.
- typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
- iterator begin() const { return newRegs_.begin()+firstNew_; }
- iterator end() const { return newRegs_.end(); }
- unsigned size() const { return newRegs_.size()-firstNew_; }
- bool empty() const { return size() == 0; }
- LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
-
- ArrayRef<LiveInterval*> regs() const {
- return makeArrayRef(newRegs_).slice(firstNew_);
- }
-
- /// FIXME: Temporary accessors until we can get rid of
- /// LiveIntervals::AddIntervalsForSpills
- SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
- const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
- return uselessRegs_;
- }
-
- /// createFrom - Create a new virtual register based on OldReg.
- LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
-
- /// create - Create a new register with the same class and original slot as
- /// parent.
- LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
- return createFrom(getReg(), LIS, VRM);
- }
-
- /// anyRematerializable - Return true if any parent values may be
- /// rematerializable.
- /// This function must be called before any rematerialization is attempted.
- bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
- AliasAnalysis*);
-
- /// checkRematerializable - Manually add VNI to the list of rematerializable
- /// values if DefMI may be rematerializable.
- bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
- const TargetInstrInfo&, AliasAnalysis*);
-
- /// Remat - Information needed to rematerialize at a specific location.
- struct Remat {
- VNInfo *ParentVNI; // parent_'s value at the remat location.
- MachineInstr *OrigMI; // Instruction defining ParentVNI.
- explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
- };
-
- /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
- /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
- /// When cheapAsAMove is set, only cheap remats are allowed.
- bool canRematerializeAt(Remat &RM,
- SlotIndex UseIdx,
- bool cheapAsAMove,
- LiveIntervals &lis);
-
- /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
- /// instruction into MBB before MI. The new instruction is mapped, but
- /// liveness is not updated.
- /// Return the SlotIndex of the new instruction.
- SlotIndex rematerializeAt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg,
- const Remat &RM,
- LiveIntervals&,
- const TargetInstrInfo&,
- const TargetRegisterInfo&,
- bool Late = false);
-
- /// markRematerialized - explicitly mark a value as rematerialized after doing
- /// it manually.
- void markRematerialized(const VNInfo *ParentVNI) {
- rematted_.insert(ParentVNI);
- }
-
- /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
- bool didRematerialize(const VNInfo *ParentVNI) const {
- return rematted_.count(ParentVNI);
- }
-
- /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
- /// to erase it from LIS.
- void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
-
- /// eliminateDeadDefs - Try to delete machine instructions that are now dead
- /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
- /// and further dead efs to be eliminated.
- void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- LiveIntervals&, VirtRegMap&,
- const TargetInstrInfo&);
-
- /// calculateRegClassAndHint - Recompute register class and hint for each new
- /// register.
- void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
- const MachineLoopInfo&);
-};
-
-}
-
-#endif
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 2ca90f9f05c0..5a0d97d132dd 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -14,7 +14,7 @@
// the instruction, but are never used after the instruction (i.e., they are
// killed).
//
-// This class computes live variables using are sparse implementation based on
+// This class computes live variables using a sparse implementation based on
// the machine code SSA form. This class computes live variable information for
// each virtual and _register allocatable_ physical register in a function. It
// uses the dominance properties of SSA form to efficiently compute live
@@ -33,6 +33,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -41,6 +42,7 @@
using namespace llvm;
char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
"Live Variable Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
@@ -90,7 +92,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
MachineBasicBlock *MBB,
std::vector<MachineBasicBlock*> &WorkList) {
unsigned BBNum = MBB->getNumber();
-
+
// Check to see if this basic block is one of the killing blocks. If so,
// remove it.
for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
@@ -98,7 +100,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
break;
}
-
+
if (MBB == DefBlock) return; // Terminate recursion
if (VRInfo.AliveBlocks.test(BBNum))
@@ -107,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
// Mark the variable known alive in this bb
VRInfo.AliveBlocks.set(BBNum);
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
}
@@ -130,7 +133,6 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
unsigned BBNum = MBB->getNumber();
VarInfo& VRInfo = getVarInfo(reg);
- VRInfo.NumUses++;
// Check to see if this basic block is already a kill block.
if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
@@ -190,7 +192,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = NULL;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
MachineInstr *Def = PhysRegDef[SubReg];
if (!Def)
@@ -214,7 +216,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
PartDefRegs.insert(DefReg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg);
unsigned SubReg = *SubRegs; ++SubRegs)
PartDefRegs.insert(SubReg);
}
@@ -245,7 +247,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
true/*IsImp*/));
PhysRegDef[Reg] = LastPartialDef;
SmallSet<unsigned, 8> Processed;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
if (Processed.count(SubReg))
continue;
@@ -257,20 +259,19 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
false/*IsDef*/,
true/*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
Processed.insert(*SS);
}
}
- }
- else if (LastDef && !PhysRegUse[Reg] &&
- !LastDef->findRegisterDefOperand(Reg))
+ } else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
// Last def defines the super register, add an implicit def of reg.
- LastDef->addOperand(MachineOperand::CreateReg(Reg,
- true/*IsDef*/, true/*IsImp*/));
+ LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
// Remember this use.
PhysRegUse[Reg] = MI;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
PhysRegUse[SubReg] = MI;
}
@@ -286,7 +287,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
unsigned LastPartDefDist = 0;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
@@ -331,11 +332,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// Or whole register is defined, but only partly used.
// AX<dead> = AL<imp-def>
// = AL<kill>
- // AX =
+ // AX =
MachineInstr *LastPartDef = 0;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
@@ -350,7 +351,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
}
if (MachineInstr *Use = PhysRegUse[SubReg]) {
PartUses.insert(SubReg);
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
PartUses.insert(*SS);
unsigned Dist = DistanceMap[Use];
if (Dist > LastRefOrPartRefDist) {
@@ -366,7 +367,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// EAX<dead> = op AL<imp-def>
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
if (!PartUses.count(SubReg))
continue;
@@ -387,11 +388,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
else {
LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
PhysRegUse[SubReg] = LastRefOrPartRef;
- for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+ for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
unsigned SSReg = *SSRegs; ++SSRegs)
PhysRegUse[SSReg] = LastRefOrPartRef;
}
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
PartUses.erase(*SS);
}
} else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
@@ -419,16 +420,37 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
return true;
}
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+ // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+ // Clobbered registers are always dead, sp there is no need to use
+ // HandlePhysRegDef().
+ for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ // Skip dead regs.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+ continue;
+ // Skip mask-preserved regs.
+ if (!MO.clobbersPhysReg(Reg))
+ continue;
+ // Kill the largest clobbered super-register.
+ // This avoids needless implicit operands.
+ unsigned Super = Reg;
+ for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR)
+ if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+ Super = *SR;
+ HandlePhysRegKill(Super, 0);
+ }
+}
+
void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
SmallVector<unsigned, 4> &Defs) {
// What parts of the register are previously defined?
SmallSet<unsigned, 32> Live;
if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
Live.insert(Reg);
- for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
Live.insert(*SS);
} else {
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
// If a register isn't itself defined, but all parts that make up of it
// are defined, then consider it also defined.
@@ -440,7 +462,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
continue;
if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
Live.insert(SubReg);
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
Live.insert(*SS);
}
}
@@ -450,7 +472,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
// is referenced.
HandlePhysRegKill(Reg, MI);
// Only some of the sub-registers are used.
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
if (!Live.count(SubReg))
// Skip if this sub-register isn't defined.
@@ -469,7 +491,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
Defs.pop_back();
PhysRegDef[Reg] = MI;
PhysRegUse[Reg] = NULL;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
PhysRegDef[SubReg] = MI;
PhysRegUse[SubReg] = NULL;
@@ -492,6 +514,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
PHIJoins.clear();
+ // FIXME: LiveIntervals will be updated to remove its dependence on
+ // LiveVariables to improve compilation time and eliminate bizarre pass
+ // dependencies. Until then, we can't change much in -O0.
+ if (!MRI->isSSA())
+ report_fatal_error("regalloc=... not currently supported with -O0");
+
analyzePHINodes(mf);
// Calculate live variable information in depth first order on the CFG of the
@@ -536,8 +564,13 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Clear kill and dead markers. LV will recompute them.
SmallVector<unsigned, 4> UseRegs;
SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
if (!MO.isReg() || MO.getReg() == 0)
continue;
unsigned MOReg = MO.getReg();
@@ -559,6 +592,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
HandlePhysRegUse(MOReg, MI);
}
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI->getOperand(RegMasks[i]));
+
// Process all defs.
for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
unsigned MOReg = DefRegs[i];
@@ -590,8 +627,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// them. The tail callee need not take the same registers as input
// that it produces as output, and there are dependencies for its input
// registers elsewhere.
- if (!MBB->empty() && MBB->back().getDesc().isReturn()
- && !MBB->back().getDesc().isCall()) {
+ if (!MBB->empty() && MBB->back().isReturn()
+ && !MBB->back().isCall()) {
MachineInstr *Ret = &MBB->back();
for (MachineRegisterInfo::liveout_iterator
@@ -607,10 +644,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
}
}
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isLandingPad())
+ continue;
+ for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+ LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+ unsigned LReg = *LI;
+ if (!TRI->isInAllocatableClass(LReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LReg);
+ }
+ }
+
// Loop over PhysRegDef / PhysRegUse, killing any registers that are
// available at the end of the basic block.
for (unsigned i = 0; i != NumRegs; ++i)
- if (PhysRegDef[i] || PhysRegUse[i])
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
HandlePhysRegDef(i, 0, Defs);
std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
@@ -754,7 +808,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
const unsigned NumNew = BB->getNumber();
// All registers used by PHI nodes in SuccBB must be live through BB.
- for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+ for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
if (BBI->getOperand(i+1).getMBB() == BB)
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 1318d6212497..238bf52dfed7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -71,19 +71,15 @@ namespace {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
- return "Local Stack Slot Allocation";
- }
private:
};
} // end anonymous namespace
char LocalStackSlotPass::ID = 0;
-
-FunctionPass *llvm::createLocalStackSlotAllocationPass() {
- return new LocalStackSlotPass();
-}
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
+ "Local Stack Slot Allocation", false, false)
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 4c5fe4c480a6..6c8a1072697c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ for (MachineBasicBlock::instr_iterator
+ I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
I->AddRegOperandsToUseLists(RegInfo);
LeakDetector::removeGarbageObject(N);
@@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
/// lists.
void ilist_traits<MachineInstr>::
transferNodesFromList(ilist_traits<MachineInstr> &fromList,
- MachineBasicBlock::iterator first,
- MachineBasicBlock::iterator last) {
+ ilist_iterator<MachineInstr> first,
+ ilist_iterator<MachineInstr> last) {
assert(Parent->getParent() == fromList.Parent->getParent() &&
"MachineInstr parent mismatch!");
@@ -140,33 +141,75 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
- iterator I = begin();
- while (I != end() && I->isPHI())
+ instr_iterator I = instr_begin(), E = instr_end();
+ while (I != E && I->isPHI())
++I;
+ assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
return I;
}
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
- while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert(!I->isInsideBundle() &&
+ "First non-phi / non-label instruction is inside a bundle!");
return I;
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
- iterator I = end();
- while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+ iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
; /*noop */
- while (I != end() && !I->getDesc().isTerminator())
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+ const_iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator B = instr_begin(), E = instr_end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
++I;
return I;
}
MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
- iterator B = begin(), I = end();
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+ // Skip over end-of-block dbg_value instructions.
+ const_instr_iterator B = instr_begin(), I = instr_end();
while (I != B) {
--I;
- if (I->isDebugValue())
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
continue;
return I;
}
@@ -195,6 +238,18 @@ StringRef MachineBasicBlock::getName() const {
return "(null)";
}
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getFunction()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += (Twine("BB") + Twine(getNumber())).str();
+ return Name;
+}
+
void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
@@ -203,8 +258,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
return;
}
- if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
-
if (Indexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
@@ -218,6 +271,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ if (Alignment) {
+ OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+ << " bytes)";
+ Comma = ", ";
+ }
+
OS << '\n';
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
@@ -237,13 +296,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\n';
}
- for (const_iterator I = begin(); I != end(); ++I) {
+ for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
if (Indexes) {
if (Indexes->hasIndex(I))
OS << Indexes->getInstructionIndex(I);
OS << '\t';
}
OS << '\t';
+ if (I->isInsideBundle())
+ OS << " * ";
I->print(OS, &getParent()->getTarget());
}
@@ -260,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
void MachineBasicBlock::removeLiveIn(unsigned Reg) {
std::vector<unsigned>::iterator I =
std::find(LiveIns.begin(), LiveIns.end(), Reg);
- assert(I != LiveIns.end() && "Not a live in!");
- LiveIns.erase(I);
+ if (I != LiveIns.end())
+ LiveIns.erase(I);
}
bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
@@ -297,8 +358,22 @@ void MachineBasicBlock::updateTerminator() {
TII->RemoveBranch(*this);
} else {
// The block has an unconditional fallthrough. If its successor is not
- // its layout successor, insert a branch.
- TBB = *succ_begin();
+ // its layout successor, insert a branch. First we have to locate the
+ // only non-landing-pad successor, as that is the fallthrough block.
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ assert(!TBB && "Found more than one non-landing-pad successor!");
+ TBB = *SI;
+ }
+
+ // If there is no non-landing-pad successor, the block has no
+ // fall-through edges to be concerned with.
+ if (!TBB)
+ return;
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
@@ -435,8 +510,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
- for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
- MI != ME && MI->isPHI(); ++MI)
+ for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+ ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
MachineOperand &MO = MI->getOperand(i);
if (MO.getMBB() == fromMBB)
@@ -473,13 +548,10 @@ bool MachineBasicBlock::canFallThrough() {
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
// If we couldn't analyze the branch, examine the last instruction.
// If the block doesn't end in a known control barrier, assume fallthrough
- // is possible. The isPredicable check is needed because this code can be
+ // is possible. The isPredicated check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
- // Barrier is predicated and thus no longer an actual control barrier. This
- // is over-conservative though, because if an instruction isn't actually
- // predicated we could still treat it like a barrier.
- return empty() || !back().getDesc().isBarrier() ||
- back().getDesc().isPredicable();
+ // Barrier is predicated and thus no longer an actual control barrier.
+ return empty() || !back().isBarrier() || TII->isPredicated(&back());
}
// If there is no branch, control always falls through.
@@ -538,14 +610,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Collect a list of virtual registers killed by the terminators.
SmallVector<unsigned, 4> KilledRegs;
if (LV)
- for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
MachineInstr *MI = I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+ if (!OI->isReg() || OI->getReg() == 0 ||
+ !OI->isUse() || !OI->isKill() || OI->isUndef())
continue;
unsigned Reg = OI->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(MI)) {
KilledRegs.push_back(Reg);
DEBUG(dbgs() << "Removing terminator kill: " << *MI);
@@ -565,7 +639,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
// Fix PHI nodes in Succ so they refer to NMBB instead of this
- for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+ for (MachineBasicBlock::instr_iterator
+ i = Succ->instr_begin(),e = Succ->instr_end();
i != e && i->isPHI(); ++i)
for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
if (i->getOperand(ni+1).getMBB() == this)
@@ -577,14 +652,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addLiveIn(*I);
// Update LiveVariables.
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
if (LV) {
// Restore kills of virtual registers that were killed by the terminators.
while (!KilledRegs.empty()) {
unsigned Reg = KilledRegs.pop_back_val();
- for (iterator I = end(), E = begin(); I != E;) {
- if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
continue;
- LV->getVarInfo(Reg).Kills.push_back(I);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LV->getVarInfo(Reg).Kills.push_back(I);
DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
}
@@ -650,6 +727,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
return NMBB;
}
+MachineBasicBlock::iterator
+MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
+ if (I->isBundle()) {
+ MachineBasicBlock::iterator E = llvm::next(I);
+ return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+ }
+
+ return Insts.erase(I.getInstrIterator());
+}
+
+MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
+ if (I->isBundle()) {
+ instr_iterator MII = llvm::next(I);
+ iterator E = end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII++;
+ Insts.remove(MI);
+ }
+ }
+
+ return Insts.remove(I);
+}
+
+void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
+ MachineBasicBlock *Other,
+ MachineBasicBlock::iterator From) {
+ if (From->isBundle()) {
+ MachineBasicBlock::iterator To = llvm::next(From);
+ Insts.splice(where.getInstrIterator(), Other->Insts,
+ From.getInstrIterator(), To.getInstrIterator());
+ return;
+ }
+
+ Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+}
+
/// removeFromParent - This method unlinks 'this' from the containing function,
/// and returns it, but does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
@@ -673,10 +786,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Cannot replace self with self!");
- MachineBasicBlock::iterator I = end();
- while (I != begin()) {
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
--I;
- if (!I->getDesc().isTerminator()) break;
+ if (!I->isTerminator()) break;
// Scan the operands of this machine instruction, replacing any uses of Old
// with New.
@@ -755,27 +868,27 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
DebugLoc
-MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
DebugLoc DL;
- MachineBasicBlock::iterator E = end();
- if (MBBI != E) {
- // Skip debug declarations, we don't want a DebugLoc from them.
- MachineBasicBlock::iterator MBBI2 = MBBI;
- while (MBBI2 != E && MBBI2->isDebugValue())
- MBBI2++;
- if (MBBI2 != E)
- DL = MBBI2->getDebugLoc();
- }
+ instr_iterator E = instr_end();
+ if (MBBI == E)
+ return DL;
+
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ while (MBBI != E && MBBI->isDebugValue())
+ MBBI++;
+ if (MBBI != E)
+ DL = MBBI->getDebugLoc();
return DL;
}
/// getSuccWeight - Return weight of the edge from this block to MBB.
///
-uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
if (Weights.empty())
return 0;
- succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
return *getWeightIterator(I);
}
@@ -789,6 +902,16 @@ getWeightIterator(MachineBasicBlock::succ_iterator I) {
return Weights.begin() + index;
}
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::const_weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
bool t) {
OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index b92cda961474..a079d6e59139 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -56,6 +56,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
/// the other block frequencies. We do this to avoid using of floating points.
///
BlockFrequency MachineBlockFrequencyInfo::
-getBlockFreq(MachineBasicBlock *MBB) const {
+getBlockFreq(const MachineBasicBlock *MBB) const {
return MBFI->getBlockFreq(MBB);
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..22d7212007fc
--- /dev/null
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1001 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absense of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(CondBranchTakenFreq,
+ "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+ "Potential frequency of taking unconditional branches");
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities. We also can use a block chain to represent a sequence of
+/// basic blocks which have some external (correctness) requirement for
+/// sequential layout.
+///
+/// Eventually, the block chains will form a directed graph over the function.
+/// We provide an SCC-supporting-iterator in order to quicky build and walk the
+/// SCCs of block chains within a function.
+///
+/// The block chains also have support for calculating and caching probability
+/// information related to the chain itself versus other chains. This is used
+/// for ranking during the final layout of block chains.
+class BlockChain {
+ /// \brief The sequence of blocks belonging to this chain.
+ ///
+ /// This is the sequence of blocks for a particular chain. These will be laid
+ /// out in-order within the function.
+ SmallVector<MachineBasicBlock *, 4> Blocks;
+
+ /// \brief A handle to the function-wide basic block to block chain mapping.
+ ///
+ /// This is retained in each block chain to simplify the computation of child
+ /// block chains for SCC-formation and iteration. We store the edges to child
+ /// basic blocks, and map them back to their associated chains using this
+ /// structure.
+ BlockToChainMapType &BlockToChain;
+
+public:
+ /// \brief Construct a new BlockChain.
+ ///
+ /// This builds a new block chain representing a single basic block in the
+ /// function. It also registers itself as the chain that block participates
+ /// in with the BlockToChain mapping.
+ BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ assert(BB && "Cannot create a chain with a null basic block");
+ BlockToChain[BB] = this;
+ }
+
+ /// \brief Iterator over blocks within the chain.
+ typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator;
+
+ /// \brief Beginning of blocks within the chain.
+ iterator begin() const { return Blocks.begin(); }
+
+ /// \brief End of blocks within the chain.
+ iterator end() const { return Blocks.end(); }
+
+ /// \brief Merge a block chain into this one.
+ ///
+ /// This routine merges a block chain into this one. It takes care of forming
+ /// a contiguous sequence of basic blocks, updating the edge list, and
+ /// updating the block -> chain mapping. It does not free or tear down the
+ /// old chain, but the old chain's block list is no longer valid.
+ void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+ assert(BB);
+ assert(!Blocks.empty());
+
+ // Fast path in case we don't have a chain already.
+ if (!Chain) {
+ assert(!BlockToChain[BB]);
+ Blocks.push_back(BB);
+ BlockToChain[BB] = this;
+ return;
+ }
+
+ assert(BB == *Chain->begin());
+ assert(Chain->begin() != Chain->end());
+
+ // Update the incoming blocks to point to this chain, and add them to the
+ // chain structure.
+ for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
+ BI != BE; ++BI) {
+ Blocks.push_back(*BI);
+ assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
+ BlockToChain[*BI] = this;
+ }
+ }
+
+#ifndef NDEBUG
+ /// \brief Dump the blocks in this chain.
+ void dump() LLVM_ATTRIBUTE_USED {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ (*I)->dump();
+ }
+#endif // NDEBUG
+
+ /// \brief Count of predecessors within the loop currently being processed.
+ ///
+ /// This count is updated at each loop we process to represent the number of
+ /// in-loop predecessors of this chain.
+ unsigned LoopPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+ /// \brief A typedef for a block filter set.
+ typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+ /// \brief A handle to the loop info.
+ const MachineLoopInfo *MLI;
+
+ /// \brief A handle to the target's instruction info.
+ const TargetInstrInfo *TII;
+
+ /// \brief A handle to the target's lowering info.
+ const TargetLowering *TLI;
+
+ /// \brief Allocator and owner of BlockChain structures.
+ ///
+ /// We build BlockChains lazily by merging together high probability BB
+ /// sequences acording to the "Algo2" in the paper mentioned at the top of
+ /// the file. To reduce malloc traffic, we allocate them using this slab-like
+ /// allocator, and destroy them after the pass completes.
+ SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+ /// \brief Function wide BasicBlock to BlockChain mapping.
+ ///
+ /// This mapping allows efficiently moving from any given basic block to the
+ /// BlockChain it participates in, if any. We use it to, among other things,
+ /// allow implicitly defining edges between chains as the existing edges
+ /// between basic blocks.
+ DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+ void markChainSuccessors(BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ MachineFunction &F,
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
+ void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *findBestLoopTop(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ void buildCFGChains(MachineFunction &F);
+ void AlignLoops(MachineFunction &F);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacement() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber()
+ << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS.flush();
+ return Result;
+}
+
+/// \brief Helper to print the number of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockNum(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber();
+ OS.flush();
+ return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+ BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Walk all the blocks in this chain, marking their successors as having
+ // a predecessor placed.
+ for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
+ CBI != CBE; ++CBI) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
+ SE = (*CBI)->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ continue;
+
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*SuccChain.begin());
+ }
+ }
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+ MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(4, 5); // 80%
+
+ MachineBasicBlock *BestSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we manually compute probabilities using the edge
+ // weights. This is suboptimal as it means that the somewhat subtle
+ // definition of edge weight semantics is encoded here as well. We should
+ // improve the MBPI interface to effeciently support query patterns such as
+ // this.
+ uint32_t BestWeight = 0;
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
+ continue;
+ }
+ if (*SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+ // Only consider successors which are either "hot", or wouldn't violate
+ // any CFG constraints.
+ if (SuccChain.LoopPredecessors != 0) {
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
+ continue;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more important
+ // predecessor.
+ BlockFrequency CandidateEdgeFreq
+ = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ bool BadCFGConflict = false;
+ for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+ PE = (*SI)->pred_end();
+ PI != PE; ++PI) {
+ if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+ BlockToChain[*PI] == &Chain)
+ continue;
+ BlockFrequency PredEdgeFreq
+ = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+ if (PredEdgeFreq >= CandidateEdgeFreq) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+ if (BadCFGConflict) {
+ DEBUG(dbgs() << " " << getBlockName(*SI)
+ << " -> non-cold CFG conflict\n");
+ continue;
+ }
+ }
+
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+ if (BestSucc && BestWeight >= SuccWeight)
+ continue;
+ BestSucc = *SI;
+ BestWeight = SuccWeight;
+ }
+ return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+ const BlockChain &PlacedChain;
+ const BlockToChainMapType &BlockToChain;
+
+public:
+ IsBlockPlaced(const BlockChain &PlacedChain,
+ const BlockToChainMapType &BlockToChain)
+ : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+ bool operator()(MachineBasicBlock *BB) const {
+ return BlockToChain.lookup(BB) == &PlacedChain;
+ }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+ IsBlockPlaced(Chain, BlockToChain)),
+ WorkList.end());
+
+ MachineBasicBlock *BestBlock = 0;
+ BlockFrequency BestFreq;
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+ WBE = WorkList.end();
+ WBI != WBE; ++WBI) {
+ BlockChain &SuccChain = *BlockToChain[*WBI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*WBI)
+ << " -> Already merged!\n");
+ continue;
+ }
+ assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+ DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
+ << " (freq)\n");
+ if (BestBlock && BestFreq >= CandidateFreq)
+ continue;
+ BestBlock = *WBI;
+ BestFreq = CandidateFreq;
+ }
+ return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(I))
+ continue;
+ if (BlockToChain[I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[I]->begin();
+ }
+ }
+ return 0;
+}
+
+void MachineBlockPlacement::buildChain(
+ MachineBasicBlock *BB,
+ BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ MachineFunction &F = *BB->getParent();
+ MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
+ MachineBasicBlock *LoopHeaderBB = BB;
+ markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ BB = *llvm::prior(Chain.end());
+ for (;;) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ assert(*llvm::prior(Chain.end()) == BB);
+ MachineBasicBlock *BestSucc = 0;
+
+ // Look for the best viable successor if there is one to place immediately
+ // after this block.
+ BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+ // If an immediate successor isn't available, look for the best viable
+ // block among those we've identified as not violating the loop's CFG at
+ // this point. This won't be a fallthrough, but it will increase locality.
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+
+ if (!BestSucc) {
+ BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+ BlockFilter);
+ if (!BestSucc)
+ break;
+
+ DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
+ }
+
+ // Place this block, updating the datastructures to reflect its placement.
+ BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out LoopPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.LoopPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
+ << " to " << getBlockNum(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ Chain.merge(BestSucc, &SuccChain);
+ BB = *llvm::prior(Chain.end());
+ }
+
+ DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockNum(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // We don't want to layout the loop linearly in all cases. If the loop header
+ // is just a normal basic block in the loop, we want to look for what block
+ // within the loop is the best one to layout at the top. However, if the loop
+ // header has be pre-merged into a chain due to predecessors not having
+ // analyzable branches, *and* the predecessor it is merged with is *not* part
+ // of the loop, rotating the header into the middle of the loop will create
+ // a non-contiguous range of blocks which is Very Bad. So start with the
+ // header and only rotate if safe.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return L.getHeader();
+
+ BlockFrequency BestExitEdgeFreq;
+ MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *LoopingBB = 0;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
+ for (MachineLoop::block_iterator I = L.block_begin(),
+ E = L.block_end();
+ I != E; ++I) {
+ BlockChain &Chain = *BlockToChain[*I];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an analyzable branch.
+ if (*I != *llvm::prior(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ // We also compute and store the best looping successor for use in layout.
+ MachineBasicBlock *BestLoopSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we use the internal weights. This is only valid
+ // because it is purely a ranking function, we don't care about anything
+ // but the relative values.
+ uint32_t BestLoopSuccWeight = 0;
+ // FIXME: We also manually compute the probabilities to avoid quadratic
+ // behavior.
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+ for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+ SE = (*I)->succ_end();
+ SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ if (*SI == *I)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: "
+ : "exiting: ")
+ << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (chain conflict)\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+ if (LoopBlockSet.count(*SI)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
+ continue;
+
+ BestLoopSucc = *SI;
+ BestLoopSuccWeight = SuccWeight;
+ continue;
+ }
+
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+ // Note that we slightly bias this toward an existing layout successor to
+ // retain incoming order in the absence of better information.
+ // FIXME: Should we bias this more strongly? It's pretty weak.
+ if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+ ((*I)->isLayoutSuccessor(*SI) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = *I;
+ }
+
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(*I);
+ }
+
+ // Restore the old exiting state, no viable looping successor was found.
+ if (!BestLoopSucc) {
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ continue;
+ }
+
+ // If this was best exiting block thus far, also record the looping block.
+ if (ExitingBB == *I)
+ LoopingBB = BestLoopSucc;
+ }
+ // Without a candidate exitting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB || L.getNumBlocks() == 1)
+ return L.getHeader();
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return L.getHeader();
+
+ assert(LoopingBB && "All successors of a loop block are exit blocks!");
+ DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n");
+ return LoopingBB;
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
+ MachineLoop &L) {
+ // First recurse through any nested loops, building chains for those inner
+ // loops.
+ for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+
+ MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
+ BlockChain &LoopChain = *BlockToChain[LayoutTop];
+
+ // FIXME: This is a really lame way of walking the chains in the loop: we
+ // walk the blocks, and use a set to prevent visiting a particular chain
+ // twice.
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.LoopPredecessors == 0);
+ UpdatedPreds.insert(&LoopChain);
+ for (MachineLoop::block_iterator BI = L.block_begin(),
+ BE = L.block_end();
+ BI != BE; ++BI) {
+ BlockChain &Chain = *BlockToChain[*BI];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
+
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.LoopPredecessors) {
+ BadLoop = true;
+ dbgs() << "Loop chain contains a block without its preds placed!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
+ for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
+ BCI != BCE; ++BCI)
+ if (!LoopBlockSet.erase(*BCI)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
+ dbgs() << "Loop chain contains a block not contained by the loop!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+ LBE = LoopBlockSet.end();
+ LBI != LBE; ++LBI)
+ dbgs() << "Loop contains blocks never placed into a chain!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*LBI) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
+ });
+}
+
+void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+ // Ensure that every BB in the function has an associated chain to simplify
+ // the assumptions of the remaining algorithm.
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = FI;
+ BlockChain *Chain
+ = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ for (;;) {
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI(llvm::next(FI));
+ MachineBasicBlock *NextBB = NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, 0);
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
+
+ // Build any loop-based chains.
+ for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
+ ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = &*FI;
+ BlockChain &Chain = *BlockToChain[BB];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain)
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ BlockChain &FunctionChain = *BlockToChain[&F.front()];
+ buildChain(&F.front(), FunctionChain, BlockWorkList);
+
+ typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
+ FunctionBlockSetType FunctionBlockSet;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ FunctionBlockSet.insert(FI);
+
+ for (BlockChain::iterator BCI = FunctionChain.begin(),
+ BCE = FunctionChain.end();
+ BCI != BCE; ++BCI)
+ if (!FunctionBlockSet.erase(*BCI)) {
+ BadFunc = true;
+ dbgs() << "Function chain contains a block not in the function!\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+ FBE = FunctionBlockSet.end();
+ FBI != FBE; ++FBI)
+ dbgs() << "Function contains blocks never placed into a chain!\n"
+ << " Bad block: " << getBlockName(*FBI) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
+ });
+
+ // Splice the blocks into place.
+ MachineFunction::iterator InsertPos = F.begin();
+ for (BlockChain::iterator BI = FunctionChain.begin(),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(*BI) << "\n");
+ if (InsertPos != MachineFunction::iterator(*BI))
+ F.splice(InsertPos, *BI);
+ else
+ ++InsertPos;
+
+ // Update the terminator of the previous block.
+ if (BI == FunctionChain.begin())
+ continue;
+ MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+
+ // FIXME: It would be awesome of updateTerminator would just return rather
+ // than assert when the branch cannot be analyzed in order to remove this
+ // boiler plate.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+ PrevBB->updateTerminator();
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
+ F.back().updateTerminator();
+}
+
+/// \brief Recursive helper to align a loop and any nested loops.
+static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
+ // Recurse through nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ AlignLoop(F, *I, Align);
+
+ L->getTopBlock()->setAlignment(Align);
+}
+
+/// \brief Align loop headers to target preferred alignments.
+void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
+ if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return;
+
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return; // Don't care about loop alignment.
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+ AlignLoop(F, *I, Align);
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = F.getTarget().getInstrInfo();
+ TLI = F.getTarget().getTargetLowering();
+ assert(BlockToChain.empty());
+
+ buildCFGChains(F);
+ AlignLoops(F);
+
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+
+ // We always return true as we have no way to track whether the final order
+ // differs from the original order.
+ return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absense of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
+ Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
+ : NumUncondBranches;
+ Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
+ : UncondBranchTakenFreq;
+ for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end();
+ SI != SE; ++SI) {
+ // Skip if this successor is a fallthrough.
+ if (I->isLayoutSuccessor(*SI))
+ continue;
+
+ BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ ++NumBranches;
+ BranchTakenFreq += EdgeFreq.getFrequency();
+ }
+ }
+
+ return false;
+}
+
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index c13fa6bc5333..0cc1af07952d 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -26,26 +26,43 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
char MachineBranchProbabilityInfo::ID = 0;
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(MachineBasicBlock *MBB) const {
- uint32_t Sum = 0;
+void MachineBranchProbabilityInfo::anchor() { }
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
+ // First we compute the sum with 64-bits of precision, ensuring that cannot
+ // overflow by bounding the number of weights considered. Hopefully no one
+ // actually needs 2^32 successors.
+ assert(MBB->succ_size() < UINT32_MAX);
+ uint64_t Sum = 0;
+ Scale = 1;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- MachineBasicBlock *Succ = *I;
- uint32_t Weight = getEdgeWeight(MBB, Succ);
- uint32_t PrevSum = Sum;
-
+ uint32_t Weight = getEdgeWeight(MBB, *I);
Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
}
+ // If the computed sum fits in 32-bits, we're done.
+ if (Sum <= UINT32_MAX)
+ return Sum;
+
+ // Otherwise, compute the scale necessary to cause the weights to fit, and
+ // re-sum with that scale applied.
+ assert((Sum / UINT32_MAX) < UINT32_MAX);
+ Scale = (Sum / UINT32_MAX) + 1;
+ Sum = 0;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, *I);
+ Sum += Weight / Scale;
+ }
+ assert(Sum <= UINT32_MAX);
return Sum;
}
uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) const {
+MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
uint32_t Weight = Src->getSuccWeight(Dst);
if (!Weight)
return DEFAULT_WEIGHT;
@@ -55,37 +72,24 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
- uint32_t Weight = getEdgeWeight(Src, Dst);
- uint32_t Sum = getSumForBlock(Src);
-
- // FIXME: Implement BranchProbability::compare then change this code to
- // compare this BranchProbability against a static "hot" BranchProbability.
- return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+ // FIXME: Compare against a static "hot" BranchProbability.
+ return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
}
MachineBasicBlock *
MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
- uint32_t Sum = 0;
uint32_t MaxWeight = 0;
MachineBasicBlock *MaxSucc = 0;
-
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- MachineBasicBlock *Succ = *I;
- uint32_t Weight = getEdgeWeight(MBB, Succ);
- uint32_t PrevSum = Sum;
-
- Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
-
+ uint32_t Weight = getEdgeWeight(MBB, *I);
if (Weight > MaxWeight) {
MaxWeight = Weight;
- MaxSucc = Succ;
+ MaxSucc = *I;
}
}
- // FIXME: Use BranchProbability::compare.
- if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+ if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
return MaxSucc;
return 0;
@@ -94,8 +98,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
BranchProbability
MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
- uint32_t N = getEdgeWeight(Src, Dst);
- uint32_t D = getSumForBlock(Src);
+ uint32_t Scale = 1;
+ uint32_t D = getSumForBlock(Src, Scale);
+ uint32_t N = getEdgeWeight(Src, Dst) / Scale;
return BranchProbability(N, D);
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 7eda8c129dc4..a63688e9ec62 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -26,13 +26,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
-
using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
@@ -49,7 +50,7 @@ namespace {
}
virtual bool runOnMachineFunction(MachineFunction &MF);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -62,6 +63,8 @@ namespace {
virtual void releaseMemory() {
ScopeMap.clear();
Exps.clear();
+ AllocatableRegs.clear();
+ ReservedRegs.clear();
}
private:
@@ -75,6 +78,8 @@ namespace {
ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
+ BitVector AllocatableRegs;
+ BitVector ReservedRegs;
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
@@ -82,9 +87,12 @@ namespace {
MachineBasicBlock::const_iterator E) const ;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs) const;
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs) const;
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -99,6 +107,7 @@ namespace {
} // end anonymous namespace
char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -106,8 +115,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
-FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
-
bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB) {
bool Changed = false;
@@ -163,6 +170,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
bool SeenDef = false;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ SeenDef = true;
if (!MO.isReg() || !MO.getReg())
continue;
if (!TRI->regsOverlap(MO.getReg(), Reg))
@@ -173,7 +182,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
SeenDef = true;
}
if (SeenDef)
- // See a def of Reg (or an alias) before encountering any use, it's
+ // See a def of Reg (or an alias) before encountering any use, it's
// trivially dead.
return true;
@@ -189,7 +198,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs) const {
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs) const{
MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -207,7 +217,9 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
PhysRefs.insert(Reg);
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (MO.isDef())
+ PhysDefs.push_back(Reg);
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
PhysRefs.insert(*Alias);
}
@@ -215,25 +227,56 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs) const {
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
- // not in the same basic block as the given instruction.
- MachineBasicBlock *MBB = MI->getParent();
- if (CSMI->getParent() != MBB)
- return false;
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+ return false;
+
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+ if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+ // Avoid extending live range of physical registers if they are
+ //allocatable or reserved.
+ return false;
+ }
+ CrossMBB = true;
+ }
MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
- while (I != E && I->isDebugValue())
+ while (I != E && I != EE && I->isDebugValue())
++I;
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ (void)CrossMBB;
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
if (I == E)
return true;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
+ // RegMasks go on instructions like calls that clobber lots of physregs.
+ // Don't attempt to CSE across such an instruction.
+ if (MO.isRegMask())
+ return false;
if (!MO.isReg() || !MO.isDef())
continue;
unsigned MOReg = MO.getReg();
@@ -260,12 +303,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
return false;
// Ignore stuff that we obviously can't move.
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
MI->hasUnmodeledSideEffects())
return false;
- if (MCID.mayLoad()) {
+ if (MI->mayLoad()) {
// Okay, this instruction does a load. As a refinement, we allow the target
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
@@ -287,7 +329,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
- if (MI->getDesc().isAsCheapAsAMove()) {
+ if (MI->isAsCheapAsAMove()) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -376,7 +418,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Commute commutable instructions.
bool Commuted = false;
- if (!FoundCSE && MI->getDesc().isCommutable()) {
+ if (!FoundCSE && MI->isCommutable()) {
MachineInstr *NewMI = TII->commuteInstruction(MI);
if (NewMI) {
Commuted = true;
@@ -394,16 +436,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
SmallSet<unsigned,8> PhysRefs;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+ SmallVector<unsigned, 2> PhysDefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false;
- // ... Unless the CS is local and it also defines the physical register
- // which is not clobbered in between and the physical register uses
- // were not clobbered.
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
FoundCSE = true;
}
@@ -458,6 +502,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
MRI->clearKillFlags(CSEPairs[i].second);
}
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ unsigned LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn))
+ MBB->addLiveIn(LiveIn);
+ }
+ ++NumCrossBBCSEs;
+ }
+
MI->eraseFromParent();
++NumCSEs;
if (!PhysRefs.empty())
@@ -542,5 +598,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<MachineDominatorTree>();
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ ReservedRegs = TRI->getReservedRegs(MF);
return PerformCSE(DT->getRootNode());
}
diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp
new file mode 100644
index 000000000000..81b49784c052
--- /dev/null
+++ b/lib/CodeGen/MachineCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+using namespace llvm;
+
+void MachineCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..9730eaacf6e4
--- /dev/null
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,340 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+ class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ BitVector ReservedRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef SmallVector<unsigned, 4> DestList;
+ typedef DenseMap<unsigned, DestList> SourceMap;
+
+ void SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+ bool CopyPropagateBlock(MachineBasicBlock &MBB);
+ };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+ "Machine Copy Propagation Pass", false, false)
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+ SourceMap::iterator SI = SrcMap.find(Reg);
+ if (SI != SrcMap.end()) {
+ const DestList& Defs = SI->second;
+ for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ unsigned MappedDef = *I;
+ // Source of copy is no longer available for propagation.
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ SI = SrcMap.find(*AS);
+ if (SI != SrcMap.end()) {
+ const DestList& Defs = SI->second;
+ for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ unsigned MappedDef = *I;
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+ }
+}
+
+static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
+ const MachineInstr *MI) {
+ const MachineBasicBlock *MBB = CopyMI->getParent();
+ if (MI->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_iterator I = CopyMI;
+ MachineBasicBlock::const_iterator E = MBB->end();
+ MachineBasicBlock::const_iterator E2 = MI;
+
+ ++I;
+ while (I != E && I != E2) {
+ if (I->hasUnmodeledSideEffects() || I->isCall() ||
+ I->isTerminator())
+ return false;
+ ++I;
+ }
+ return true;
+}
+
+/// isNopCopy - Return true if the specified copy is really a nop. That is
+/// if the source of the copy is the same of the definition of the copy that
+/// supplied the source. If the source of the copy is a sub-register than it
+/// must check the sub-indices match. e.g.
+/// ecx = mov eax
+/// al = mov cl
+/// But not
+/// ecx = mov eax
+/// al = mov ch
+static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
+ const TargetRegisterInfo *TRI) {
+ unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+ if (Def == SrcSrc)
+ return true;
+ if (TRI->isSubRegister(SrcSrc, Def)) {
+ unsigned SrcDef = CopyMI->getOperand(0).getReg();
+ unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
+ if (!SubIdx)
+ return false;
+ return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
+ }
+
+ return false;
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
+ DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
+ DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map
+ SourceMap SrcMap; // Src -> Def map
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isCopy()) {
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Def) ||
+ TargetRegisterInfo::isVirtualRegister(Src))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+ if (CI != AvailCopyMap.end()) {
+ MachineInstr *CopyMI = CI->second;
+ if (!ReservedRegs.test(Def) &&
+ (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+ isNopCopy(CopyMI, Def, Src, TRI)) {
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX<kill>
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ //
+ // Also avoid eliminating a copy from reserved registers unless the
+ // definition is proven not clobbered. e.g.
+ // %RSP<def> = COPY %RAX
+ // CALL
+ // %RAX<def> = COPY %RSP
+
+ // Clear any kills of Def between CopyMI and MI. This extends the
+ // live range.
+ for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
+ I->clearRegisterKills(Def, TRI);
+
+ MI->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ continue;
+ }
+ }
+
+ // If Src is defined by a previous copy, it cannot be eliminated.
+ CI = CopyMap.find(Src);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) {
+ CI = CopyMap.find(*AS);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+
+ // Copy is now a candidate for deletion.
+ MaybeDeadCopies.insert(MI);
+
+ // If 'Src' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9<def> = copy %xmm2
+ // ...
+ // %xmm2<def> = copy %xmm0
+ // ...
+ // %xmm2<def> = copy %xmm9
+ SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+ // Remember Def is defined by the copy.
+ // ... Make sure to clear the def maps of aliases first.
+ for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) {
+ CopyMap.erase(*AS);
+ AvailCopyMap.erase(*AS);
+ }
+ CopyMap[Def] = MI;
+ AvailCopyMap[Def] = MI;
+ for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+ CopyMap[*SR] = MI;
+ AvailCopyMap[*SR] = MI;
+ }
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
+ SrcMap[Src].end()) {
+ SrcMap[Src].push_back(Def);
+ }
+
+ continue;
+ }
+
+ // Not a copy.
+ SmallVector<unsigned, 2> Defs;
+ int RegMaskOpNum = -1;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ RegMaskOpNum = i;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ continue;
+ }
+
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ CI = CopyMap.find(*AS);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+
+ // The instruction has a register mask operand which means that it clobbers
+ // a large set of registers. It is possible to use the register mask to
+ // prune the available copies, but treat it like a basic block boundary for
+ // now.
+ if (RegMaskOpNum >= 0) {
+ // Erase any MaybeDeadCopies whose destination register is clobbered.
+ const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ unsigned Reg = (*DI)->getOperand(0).getReg();
+ if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ continue;
+ (*DI)->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+
+ // Clear all data structures as if we were beginning a new basic block.
+ MaybeDeadCopies.clear();
+ AvailCopyMap.clear();
+ CopyMap.clear();
+ SrcMap.clear();
+ continue;
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+
+ // No longer defined by a copy.
+ CopyMap.erase(Reg);
+ AvailCopyMap.erase(Reg);
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ CopyMap.erase(*AS);
+ AvailCopyMap.erase(*AS);
+ }
+
+ // If 'Reg' is previously source of a copy, it is no longer available for
+ // copy propagation.
+ SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+ }
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+ (*DI)->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ TRI = MF.getTarget().getRegisterInfo();
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= CopyPropagateBlock(*I);
+
+ return Changed;
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 20066a067b8f..d8c2f6a2eaef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -13,12 +13,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Config/config.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -28,6 +25,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
@@ -197,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *
MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
uint64_t s, unsigned base_alignment,
- const MDNode *TBAAInfo) {
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
- TBAAInfo);
+ TBAAInfo, Ranges);
}
MachineMemOperand *
@@ -286,7 +285,13 @@ void MachineFunction::dump() const {
}
void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
- OS << "# Machine code for function " << Fn->getName() << ":\n";
+ OS << "# Machine code for function " << Fn->getName() << ": ";
+ if (RegInfo) {
+ OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
+ if (!RegInfo->tracksLiveness())
+ OS << ", not tracking liveness";
+ }
+ OS << '\n';
// Print Frame Information
FrameInfo->print(*this, OS);
@@ -335,7 +340,7 @@ namespace llvm {
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const MachineFunction *F) {
- return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+ return "CFG for '" + F->getFunction()->getName().str() + "' function";
}
std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -368,7 +373,7 @@ namespace llvm {
void MachineFunction::viewCFG() const
{
#ifndef NDEBUG
- ViewGraph(this, "mf" + getFunction()->getNameStr());
+ ViewGraph(this, "mf" + getFunction()->getName());
#else
errs() << "MachineFunction::viewCFG is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
@@ -378,7 +383,7 @@ void MachineFunction::viewCFG() const
void MachineFunction::viewCFGOnly() const
{
#ifndef NDEBUG
- ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+ ViewGraph(this, "mf" + getFunction()->getName(), true);
#else
errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
@@ -464,7 +469,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
if (!isCalleeSavedInfoValid())
return BV;
- for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
BV.set(*CSR);
// The entry MBB always has all CSRs pristine.
@@ -532,6 +537,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return 8;
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
case MachineJumpTableInfo::EK_Custom32:
@@ -539,8 +546,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
case MachineJumpTableInfo::EK_Inline:
return 0;
}
- assert(0 && "Unknown jump table encoding!");
- return ~0;
+ llvm_unreachable("Unknown jump table encoding!");
}
/// getEntryAlignment - Return the alignment of each entry in the jump table.
@@ -551,6 +557,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return TD.getABIIntegerTypeAlignment(64);
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
case MachineJumpTableInfo::EK_Custom32:
@@ -558,8 +566,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
case MachineJumpTableInfo::EK_Inline:
return 1;
}
- assert(0 && "Unknown jump table encoding!");
- return ~0;
+ llvm_unreachable("Unknown jump table encoding!");
}
/// createJumpTableIndex - Create a new jump table entry in the jump table info.
@@ -619,6 +626,8 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
// MachineConstantPool implementation
//===----------------------------------------------------------------------===//
+void MachineConstantPoolValue::anchor() { }
+
Type *MachineConstantPoolEntry::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
@@ -653,35 +662,37 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// reject them.
if (A->getType() == B->getType()) return false;
+ // We can't handle structs or arrays.
+ if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+ isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+ return false;
+
// For now, only support constants with the same size.
- if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+ uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
+ if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
+ StoreSize > 128)
return false;
- // If a floating-point value and an integer value have the same encoding,
- // they can share a constant-pool entry.
- if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
- if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
- return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
- if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
- if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
- return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
-
- // Two vectors can share an entry if each pair of corresponding
- // elements could.
- if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
- if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
- if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
- return false;
- for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
- if (!CanShareConstantPoolEntry(AV->getOperand(i),
- BV->getOperand(i), TD))
- return false;
- return true;
- }
-
- // TODO: Handle other cases.
-
- return false;
+ Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+ // Try constant folding a bitcast of both instructions to an integer. If we
+ // get two identical ConstantInt's, then we are good to share them. We use
+ // the constant folding APIs to do this so that we get the benefit of
+ // TargetData.
+ if (isa<PointerType>(A->getType()))
+ A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(A), TD);
+ else if (A->getType() != IntTy)
+ A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(A), TD);
+ if (isa<PointerType>(B->getType()))
+ B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(B), TD);
+ else if (B->getType() != IntTy)
+ B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(B), TD);
+
+ return A == B;
}
/// getConstantPoolIndex - Create a new entry in the constant pool or return
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 054c750c9f2b..35591e1649d3 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -19,9 +19,8 @@ using namespace llvm;
char MachineFunctionAnalysis::ID = 0;
-MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
- CodeGenOpt::Level OL) :
- FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
+ FunctionPass(ID), TM(tm), MF(0) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index a240667f7d6a..e553a0463a2a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -178,6 +179,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsKill = isKill;
IsDead = isDead;
IsUndef = isUndef;
+ IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
SubReg = 0;
@@ -191,7 +193,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return false;
switch (getType()) {
- default: llvm_unreachable("Unrecognized operand type");
case MachineOperand::MO_Register:
return getReg() == Other.getReg() && isDef() == Other.isDef() &&
getSubReg() == Other.getSubReg();
@@ -216,11 +217,14 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
getOffset() == Other.getOffset();
case MachineOperand::MO_BlockAddress:
return getBlockAddress() == Other.getBlockAddress();
+ case MO_RegisterMask:
+ return getRegMask() == Other.getRegMask();
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_Metadata:
return getMetadata() == Other.getMetadata();
}
+ llvm_unreachable("Invalid machine operand type");
}
/// print - Print the specified machine operand.
@@ -240,7 +244,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
- isEarlyClobber()) {
+ isInternalRead() || isEarlyClobber()) {
OS << '<';
bool NeedComma = false;
if (isDef()) {
@@ -256,14 +260,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
NeedComma = true;
}
- if (isKill() || isDead() || isUndef()) {
+ if (isKill() || isDead() || isUndef() || isInternalRead()) {
if (NeedComma) OS << ',';
- if (isKill()) OS << "kill";
- if (isDead()) OS << "dead";
+ NeedComma = false;
+ if (isKill()) {
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ OS << "dead";
+ NeedComma = true;
+ }
if (isUndef()) {
- if (isKill() || isDead())
- OS << ',';
+ if (NeedComma) OS << ',';
OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
}
}
OS << '>';
@@ -311,6 +327,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
OS << '>';
break;
+ case MachineOperand::MO_RegisterMask:
+ OS << "<regmask>";
+ break;
case MachineOperand::MO_Metadata:
OS << '<';
WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
@@ -319,8 +338,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
case MachineOperand::MO_MCSymbol:
OS << "<MCSym=" << *getMCSymbol() << '>';
break;
- default:
- llvm_unreachable("Unrecognized operand type");
}
if (unsigned TF = getTargetFlags())
@@ -364,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
uint64_t s, unsigned int a,
- const MDNode *TBAAInfo)
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges)
: PtrInfo(ptrinfo), Size(s),
Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
- TBAAInfo(TBAAInfo) {
+ TBAAInfo(TBAAInfo), Ranges(Ranges) {
assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
"invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
@@ -465,7 +483,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
/// MCID NULL and no operands.
MachineInstr::MachineInstr()
: MCID(0), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0),
+ NumMemRefs(0), MemRefs(0),
Parent(0) {
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -473,10 +491,10 @@ MachineInstr::MachineInstr()
void MachineInstr::addImplicitDefUseOperands() {
if (MCID->ImplicitDefs)
- for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID->ImplicitUses)
- for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+ for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
}
@@ -485,7 +503,7 @@ void MachineInstr::addImplicitDefUseOperands() {
/// the MCInstrDesc.
MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0) {
+ NumMemRefs(0), MemRefs(0), Parent(0) {
unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -500,7 +518,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -516,7 +534,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
/// basic block.
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0) {
+ NumMemRefs(0), MemRefs(0), Parent(0) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
unsigned NumImplicitOps =
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -532,7 +550,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
unsigned NumImplicitOps =
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -547,7 +565,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
: MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
- MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+ NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -722,17 +740,33 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
void MachineInstr::addMemOperand(MachineFunction &MF,
MachineMemOperand *MO) {
mmo_iterator OldMemRefs = MemRefs;
- mmo_iterator OldMemRefsEnd = MemRefsEnd;
+ uint16_t OldNumMemRefs = NumMemRefs;
- size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+ uint16_t NewNum = NumMemRefs + 1;
mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
- mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
- std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+ std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
NewMemRefs[NewNum - 1] = MO;
MemRefs = NewMemRefs;
- MemRefsEnd = NewMemRefsEnd;
+ NumMemRefs = NewNum;
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+ const MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
+ while (MII != MBB->end() && MII->isInsideBundle()) {
+ if (MII->getDesc().getFlags() & Mask) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle)
+ return false;
+ }
+ ++MII;
+ }
+
+ return Type == AllInBundle;
}
bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
@@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
Other->getNumOperands() != getNumOperands())
return false;
+ if (isBundle()) {
+ // Both instructions are bundles, compare MIs inside the bundle.
+ MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+ while (++I1 != E1 && I1->isInsideBundle()) {
+ ++I2;
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ return false;
+ }
+ }
+
// Check operands to make sure they match.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
/// block, and returns it, but does not delete it.
MachineInstr *MachineInstr::removeFromParent() {
assert(getParent() && "Not embedded in a basic block!");
+
+ // If it's a bundle then remove the MIs inside the bundle as well.
+ if (isBundle()) {
+ MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::instr_iterator MII = *this; ++MII;
+ MachineBasicBlock::instr_iterator E = MBB->instr_end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+ MBB->remove(MI);
+ }
+ }
getParent()->remove(this);
return this;
}
@@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() {
/// block, and deletes it.
void MachineInstr::eraseFromParent() {
assert(getParent() && "Not embedded in a basic block!");
+ // If it's a bundle then remove the MIs inside the bundle as well.
+ if (isBundle()) {
+ MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::instr_iterator MII = *this; ++MII;
+ MachineBasicBlock::instr_iterator E = MBB->instr_end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+ MBB->erase(MI);
+ }
+ }
getParent()->erase(this);
}
@@ -817,6 +887,16 @@ unsigned MachineInstr::getNumExplicitOperands() const {
return NumOperands;
}
+/// isBundled - Return true if this instruction part of a bundle. This is true
+/// if either itself or its following instruction is marked "InsideBundle".
+bool MachineInstr::isBundled() const {
+ if (isInsideBundle())
+ return true;
+ MachineBasicBlock::const_instr_iterator nextMI = this;
+ ++nextMI;
+ return nextMI != Parent->instr_end() && nextMI->isInsideBundle();
+}
+
bool MachineInstr::isStackAligningInlineAsm() const {
if (isInlineAsm()) {
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -887,6 +967,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
return NULL;
}
+/// getBundleSize - Return the number of instructions inside the MI bundle.
+unsigned MachineInstr::getBundleSize() const {
+ assert(isBundle() && "Expecting a bundle");
+
+ MachineBasicBlock::const_instr_iterator I = *this;
+ unsigned Size = 0;
+ while ((++I)->isInsideBundle()) {
+ ++Size;
+ }
+ assert(Size > 1 && "Malformed bundle");
+
+ return Size;
+}
+
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
@@ -948,6 +1042,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
+ // Accept regmask operands when Overlap is set.
+ // Ignore them when looking for a specific def operand (Overlap == false).
+ if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ return i;
if (!MO.isReg() || !MO.isDef())
continue;
unsigned MOReg = MO.getReg();
@@ -1118,6 +1216,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
/// copyPredicates - Copies predicate operand(s) from MI.
void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isPredicable())
return;
@@ -1159,13 +1259,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
AliasAnalysis *AA,
bool &SawStore) const {
// Ignore stuff that we obviously can't move.
- if (MCID->mayStore() || MCID->isCall()) {
+ if (mayStore() || isCall()) {
SawStore = true;
return false;
}
if (isLabel() || isDebugValue() ||
- MCID->isTerminator() || hasUnmodeledSideEffects())
+ isTerminator() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1173,7 +1273,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
// destination. The check for isInvariantLoad gives the targe the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
- if (MCID->mayLoad() && !isInvariantLoad(AA))
+ if (mayLoad() && !isInvariantLoad(AA))
// Otherwise, this is a real load. If there is a store between the load and
// end of block, or if the load is volatile, we can't move it.
return !SawStore && !hasVolatileMemoryRef();
@@ -1213,9 +1313,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
/// have no volatile memory references.
bool MachineInstr::hasVolatileMemoryRef() const {
// An instruction known never to access memory won't have a volatile access.
- if (!MCID->mayStore() &&
- !MCID->mayLoad() &&
- !MCID->isCall() &&
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
!hasUnmodeledSideEffects())
return false;
@@ -1239,7 +1339,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
/// *all* loads the instruction does are invariant (if it does multiple loads).
bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
- if (!MCID->mayLoad())
+ if (!mayLoad())
return false;
// If the instruction has lost its memoperands, conservatively assume that
@@ -1253,6 +1353,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
E = memoperands_end(); I != E; ++I) {
if ((*I)->isVolatile()) return false;
if ((*I)->isStore()) return false;
+ if ((*I)->isInvariant()) return true;
if (const Value *V = (*I)->getValue()) {
// A load from a constant PseudoSourceValue is invariant.
@@ -1291,7 +1392,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
}
bool MachineInstr::hasUnmodeledSideEffects() const {
- if (getDesc().hasUnmodeledSideEffects())
+ if (hasProperty(MCID::UnmodeledSideEffects))
return true;
if (isInlineAsm()) {
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1384,7 +1485,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << " = ";
// Print the opcode name.
- OS << getDesc().getName();
+ if (TM && TM->getInstrInfo())
+ OS << TM->getInstrInfo()->getName(getOpcode());
+ else
+ OS << "UNKNOWN";
// Print the rest of the operands.
bool OmittedAnyCallClobbers = false;
@@ -1419,14 +1523,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
// LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MF && getDesc().isCall() &&
+ if (MF && isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
- for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+ for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
unsigned AliasReg = *Alias; ++Alias)
if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
HasAliasLive = true;
@@ -1617,6 +1721,20 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
return Found;
}
+void MachineInstr::clearRegisterKills(unsigned Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ RegInfo = 0;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned OpReg = MO.getReg();
+ if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+ MO.setIsKill(false);
+ }
+}
+
bool MachineInstr::addRegisterDead(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
@@ -1689,16 +1807,21 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
true /*IsImp*/));
}
-void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
const TargetRegisterInfo &TRI) {
+ bool HasRegMask = false;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
MachineOperand &MO = getOperand(i);
+ if (MO.isRegMask()) {
+ HasRegMask = true;
+ continue;
+ }
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
bool Dead = true;
- for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
- E = UsedRegs.end(); I != E; ++I)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
if (TRI.regsOverlap(*I, Reg)) {
Dead = false;
break;
@@ -1706,53 +1829,66 @@ void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRe
// If there are no uses, including partial uses, the def is dead.
if (Dead) MO.setIsDead();
}
+
+ // This is a call with a register mask operand.
+ // Mask clobbers are always dead, so add defs for the non-dead defines.
+ if (HasRegMask)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ addRegisterDefined(*I, &TRI);
}
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
- unsigned Hash = MI->getOpcode() * 37;
+ // Build up a buffer of hash code components.
+ //
+ // FIXME: This is a total hack. We should have a hash_value overload for
+ // MachineOperand, but currently that doesn't work because there are many
+ // different ideas of "equality" and thus different sets of information that
+ // contribute to the hash code. This one happens to want to take a specific
+ // subset. And it's still not clear that this routine uses the *correct*
+ // subset of information when computing the hash code. The goal is to use the
+ // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
+ // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
+ // be very useful to factor the selection of relevant inputs out of the two
+ // functions and into a common routine, but it's not clear how that can be
+ // done.
+ SmallVector<size_t, 8> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- uint64_t Key = (uint64_t)MO.getType() << 32;
switch (MO.getType()) {
default: break;
case MachineOperand::MO_Register:
if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
- Key |= MO.getReg();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
break;
case MachineOperand::MO_Immediate:
- Key |= MO.getImm();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
break;
case MachineOperand::MO_FrameIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_JumpTableIndex:
- Key |= MO.getIndex();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
break;
case MachineOperand::MO_MachineBasicBlock:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
break;
case MachineOperand::MO_GlobalAddress:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
break;
case MachineOperand::MO_BlockAddress:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+ HashComponents.push_back(hash_combine(MO.getType(),
+ MO.getBlockAddress()));
break;
case MachineOperand::MO_MCSymbol:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
break;
}
- Key += ~(Key << 32);
- Key ^= (Key >> 22);
- Key += ~(Key << 13);
- Key ^= (Key >> 8);
- Key += (Key << 3);
- Key ^= (Key >> 15);
- Key += ~(Key << 27);
- Key ^= (Key >> 31);
- Hash = (unsigned)Key + Hash * 37;
- }
- return Hash;
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
}
void MachineInstr::emitError(StringRef Msg) const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..73489a7160bf
--- /dev/null
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,278 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles() : MachineFunctionPass(ID) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+ "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+ MIE = MBB->instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isInsideBundle()) {
+ MII->setIsInsideBundle(false);
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+
+namespace {
+ class FinalizeMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ FinalizeMachineBundles() : MachineFunctionPass(ID) {
+ initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+ "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ assert(FirstMI != LastMI && "Empty bundle?");
+
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+ TII->get(TargetOpcode::BUNDLE));
+
+ SmallVector<unsigned, 8> LocalDefs;
+ SmallSet<unsigned, 8> LocalDefSet;
+ SmallSet<unsigned, 8> DeadDefSet;
+ SmallSet<unsigned, 8> KilledDefSet;
+ SmallVector<unsigned, 8> ExternUses;
+ SmallSet<unsigned, 8> ExternUseSet;
+ SmallSet<unsigned, 8> KilledUseSet;
+ SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ for (; FirstMI != LastMI; ++FirstMI) {
+ for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = FirstMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg)) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg)) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead()) {
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (LocalDefSet.insert(SubReg))
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ FirstMI->setIsInsideBundle();
+ Defs.clear();
+ }
+
+ SmallSet<unsigned, 8> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ if (Added.insert(Reg)) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ unsigned Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI) {
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+ while (LastMI != E && LastMI->isInsideBundle())
+ ++LastMI;
+ finalizeBundle(MBB, FirstMI, LastMI);
+ return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = *I;
+
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+ assert(!MII->isInsideBundle() &&
+ "First instr cannot be inside bundle before finalization!");
+
+ MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
+ for (++MII; MII != MIE; ) {
+ if (!MII->isInsideBundle())
+ ++MII;
+ else {
+ MII = finalizeBundle(MBB, llvm::prior(MII));
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+ RegInfo RI = { false, false, false };
+ for(; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ // Remember each (MI, OpNo) that refers to Reg.
+ if (Ops)
+ Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+ // Both defs and uses can read virtual registers.
+ if (MO.readsReg()) {
+ RI.Reads = true;
+ if (MO.isDef())
+ RI.Tied = true;
+ }
+
+ // Only defs can write.
+ if (MO.isDef())
+ RI.Writes = true;
+ else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ RI.Tied = true;
+ }
+ return RI;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index a1f80d5282e0..8c562cc4454a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -45,7 +45,7 @@ using namespace llvm;
static cl::opt<bool>
AvoidSpeculation("avoid-speculation",
cl::desc("MachineLICM should avoid speculation"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@@ -60,8 +60,6 @@ STATISTIC(NumPostRAHoisted,
namespace {
class MachineLICM : public MachineFunctionPass {
- bool PreRegAlloc;
-
const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetLowering *TLI;
@@ -69,6 +67,7 @@ namespace {
const MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
const InstrItineraryData *InstrItins;
+ bool PreRegAlloc;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -81,7 +80,13 @@ namespace {
MachineLoop *CurLoop; // The current loop we are working on.
MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
- BitVector AllocatableSet;
+ // Exit blocks for CurLoop.
+ SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+ bool isExitBlock(const MachineBasicBlock *MBB) const {
+ return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+ ExitBlocks.end();
+ }
// Track 'estimated' register pressure.
SmallSet<unsigned, 32> RegSeen;
@@ -122,8 +127,6 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
- const char *getPassName() const { return "Machine Instruction LICM"; }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
@@ -165,7 +168,9 @@ namespace {
/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
- void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+ void ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
SmallSet<int, 32> &StoredFIs,
SmallVector<CandidateInfo, 32> &Candidates);
@@ -182,12 +187,12 @@ namespace {
/// invariant. I.e., all virtual register operands are defined outside of
/// the loop, physical registers aren't accessed (explicitly or implicitly),
/// and the instruction is hoistable.
- ///
+ ///
bool IsLoopInvariantInst(MachineInstr &I);
- /// HasAnyPHIUse - Return true if the specified register is used by any
- /// phi node.
- bool HasAnyPHIUse(unsigned Reg) const;
+ /// HasLoopPHIUse - Return true if the specified instruction is used by any
+ /// phi node in the current loop.
+ bool HasLoopPHIUse(const MachineInstr *MI) const;
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
/// and an use in the current loop, return true if the target considered
@@ -200,7 +205,7 @@ namespace {
/// CanCauseHighRegPressure - Visit BBs from header to current BB,
/// check if hoisting an instruction of the given cost matrix can cause high
/// register pressure.
- bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
/// UpdateBackTraceRegPressure - Traverse the back trace from header to
/// the current block and update their register pressures to reflect the
@@ -215,13 +220,25 @@ namespace {
/// If not then a load from this mbb may not be safe to hoist.
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
- /// HoistRegion - Walk the specified region of the CFG (defined by all
- /// blocks dominated by the specified block, and that are in the current
- /// loop) in depth first order w.r.t the DominatorTree. This allows us to
- /// visit definitions before uses, allowing us to hoist a loop body in one
- /// pass without iteration.
+ void EnterScope(MachineBasicBlock *MBB);
+
+ void ExitScope(MachineBasicBlock *MBB);
+
+ /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
+ /// dominator tree node if its a leaf or all of its children are done. Walk
+ /// up the dominator tree to destroy ancestors which are now done.
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+
+ /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+ /// blocks dominated by the specified header block, and that are in the
+ /// current loop) in depth first order w.r.t the DominatorTree. This allows
+ /// us to visit definitions before uses, allowing us to hoist a loop body in
+ /// one pass without iteration.
///
- void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+ void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
/// getRegisterClassIDAndCost - For a given MI, register, and the operand
/// index, return the ID and cost of its representative register class by
@@ -278,6 +295,7 @@ namespace {
} // end anonymous namespace
char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -286,10 +304,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
-FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
- return new MachineLICM(PreRegAlloc);
-}
-
/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
/// loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
@@ -305,12 +319,6 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
}
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
- if (PreRegAlloc)
- DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
- else
- DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
- DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
-
Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
@@ -319,7 +327,14 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
MFI = MF.getFrameInfo();
MRI = &MF.getRegInfo();
InstrItins = TM->getInstrItineraryData();
- AllocatableSet = TRI->getAllocatableSet(MF);
+
+ PreRegAlloc = MRI->isSSA();
+
+ if (PreRegAlloc)
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
if (PreRegAlloc) {
// Estimate register pressure during pre-regalloc pass.
@@ -341,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
while (!Worklist.empty()) {
CurLoop = Worklist.pop_back_val();
CurPreheader = 0;
+ ExitBlocks.clear();
// If this is done before regalloc, only visit outer-most preheader-sporting
// loops.
@@ -349,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ CurLoop->getExitBlocks(ExitBlocks);
+
if (!PreRegAlloc)
HoistRegionPostRA();
else {
@@ -356,7 +374,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistRegion(N, true);
+ HoistOutOfLoop(N);
CSEMap.clear();
}
}
@@ -383,7 +401,8 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
void MachineLICM::ProcessMI(MachineInstr *MI,
- unsigned *PhysRegDefs,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
SmallSet<int, 32> &StoredFIs,
SmallVector<CandidateInfo, 32> &Candidates) {
bool RuledOut = false;
@@ -402,6 +421,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
continue;
}
+ // We can't hoist an instruction defining a physreg that is clobbered in
+ // the loop.
+ if (MO.isRegMask()) {
+ PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -411,7 +437,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
"Not expecting virtual register!");
if (!MO.isDef()) {
- if (Reg && PhysRegDefs[Reg])
+ if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
// If it's using a non-loop-invariant register, then it's obviously not
// safe to hoist.
HasNonInvariantUse = true;
@@ -419,9 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
}
if (MO.isImplicit()) {
- ++PhysRegDefs[Reg];
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- ++PhysRegDefs[*AS];
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ PhysRegClobbers.set(*AS);
if (!MO.isDead())
// Non-dead implicit def? This cannot be hoisted.
RuledOut = true;
@@ -438,14 +463,17 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
Def = Reg;
// If we have already seen another instruction that defines the same
- // register, then this is not safe.
- if (++PhysRegDefs[Reg] > 1)
- // MI defined register is seen defined by another instruction in
- // the loop, it cannot be a LICM candidate.
- RuledOut = true;
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- if (++PhysRegDefs[*AS] > 1)
+ // register, then this is not safe. Two defs is indicated by setting a
+ // PhysRegClobbers bit.
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
+ if (PhysRegDefs.test(*AS))
+ PhysRegClobbers.set(*AS);
+ if (PhysRegClobbers.test(*AS))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
RuledOut = true;
+ PhysRegDefs.set(*AS);
+ }
}
// Only consider reloads for now and remats which do not have register
@@ -461,9 +489,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
/// invariants out to the preheader.
void MachineLICM::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
unsigned NumRegs = TRI->getNumRegs();
- unsigned *PhysRegDefs = new unsigned[NumRegs];
- std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+ BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+ BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
SmallVector<CandidateInfo, 32> Candidates;
SmallSet<int, 32> StoredFIs;
@@ -485,16 +517,31 @@ void MachineLICM::HoistRegionPostRA() {
for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- ++PhysRegDefs[Reg];
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- ++PhysRegDefs[*AS];
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ PhysRegDefs.set(*AS);
}
SpeculationState = SpeculateUnknown;
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
MachineInstr *MI = &*MII;
- ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+ ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ }
+ }
+
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = TI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ TermRegs.set(*AS);
}
}
@@ -503,19 +550,25 @@ void MachineLICM::HoistRegionPostRA() {
// instruction in the loop.
// 2. If the candidate is a load from stack slot (always true for now),
// check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
if (Candidates[i].FI != INT_MIN &&
StoredFIs.count(Candidates[i].FI))
continue;
- if (PhysRegDefs[Candidates[i].Def] == 1) {
+ unsigned Def = Candidates[i].Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
MachineInstr *MI = Candidates[i].MI;
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
const MachineOperand &MO = MI->getOperand(j);
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
- if (PhysRegDefs[MO.getReg()]) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
// If it's using a non-loop-invariant register, then it's obviously
// not safe to hoist.
Safe = false;
@@ -526,8 +579,6 @@ void MachineLICM::HoistRegionPostRA() {
HoistPostRA(MI, Candidates[i].Def);
}
}
-
- delete[] PhysRegDefs;
}
/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
@@ -556,26 +607,17 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader) return;
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
- DEBUG({
- dbgs() << "Hoisting " << *MI;
- if (Preheader->getBasicBlock())
- dbgs() << " to MachineBasicBlock "
- << Preheader->getName();
- if (MI->getParent()->getBasicBlock())
- dbgs() << " from MachineBasicBlock "
- << MI->getParent()->getName();
- dbgs() << "\n";
- });
+ DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+ << MI->getParent()->getNumber() << ": " << *MI);
// Splice the instruction to the preheader.
MachineBasicBlock *MBB = MI->getParent();
Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
- // Add register to livein list to all the BBs in the current loop since a
+ // Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
// important to ensure later passes do not scavenge the def register.
AddToLiveIns(Def);
@@ -589,7 +631,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
-
+
if (BB != CurLoop->getHeader()) {
// Check loop exiting blocks.
SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
@@ -605,57 +647,126 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
-/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in depth
-/// first order w.r.t the DominatorTree. This allows us to visit definitions
-/// before uses, allowing us to hoist a loop body in one pass without iteration.
-///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
- assert(N != 0 && "Null dominator tree node?");
- MachineBasicBlock *BB = N->getBlock();
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
- // If the header of the loop containing this basic block is a landing pad,
- // then don't try to hoist instructions out of this loop.
- const MachineLoop *ML = MLI->getLoopFor(BB);
- if (ML && ML->getHeader()->isLandingPad()) return;
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+}
- // If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return;
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ BackTrace.pop_back();
+}
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader)
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
return;
- if (IsHeader) {
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+/// blocks dominated by the specified header block, and that are in the
+/// current loop) in depth first order w.r.t the DominatorTree. This allows
+/// us to visit definitions before uses, allowing us to hoist a loop body in
+/// one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(HeaderN);
+ do {
+ MachineDomTreeNode *Node = WorkList.pop_back_val();
+ assert(Node != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = Node->getBlock();
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad())
+ continue;
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB))
+ continue;
+
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() >= 25)
+ NumChildren = 0;
+
+ OpenChildren[Node] = NumChildren;
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (int i = (int)NumChildren-1; i >= 0; --i) {
+ MachineDomTreeNode *Child = Children[i];
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ if (Scopes.size() != 0) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
// Compute registers which are livein into the loop headers.
RegSeen.clear();
BackTrace.clear();
InitRegPressure(Preheader);
}
- // Remember livein register pressure.
- BackTrace.push_back(RegPressure);
+ // Now perform LICM.
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
- SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = BB->begin(), E = BB->end(); MII != E; ) {
- MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- MachineInstr *MI = &*MII;
- if (!Hoist(MI, Preheader))
- UpdateRegPressure(MI);
- MII = NextMII;
- }
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ continue;
- // Don't hoist things out of a large switch statement. This often causes
- // code to be hoisted that wasn't going to be executed, and increases
- // register pressure in a situation where it's likely to matter.
- if (BB->succ_size() < 25) {
- const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
- for (unsigned I = 0, E = Children.size(); I != E; ++I)
- HoistRegion(Children[I]);
- }
+ EnterScope(MBB);
- BackTrace.pop_back();
+ // Process the block
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
+ MII = NextMII;
+ }
+
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
}
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
@@ -670,7 +781,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
unsigned &RCId, unsigned &RCCost) const {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
EVT VT = *RC->vt_begin();
- if (VT == MVT::untyped) {
+ if (VT == MVT::Untyped) {
RCId = RC->getID();
RCCost = 1;
} else {
@@ -678,7 +789,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
RCCost = TLI->getRepRegClassCostFor(VT);
}
}
-
+
/// InitRegPressure - Find all virtual register references that are liveout of
/// the preheader to initialize the starting "register pressure". Note this
/// does not count live through (livein but not used) registers.
@@ -762,6 +873,21 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
}
}
+/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
+/// loads from global offset table or constant pool.
+static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert (MI.mayLoad() && "Expected MI that loads!");
+ for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+ E = MI.memoperands_end(); I != E; ++I) {
+ if (const Value *V = (*I)->getValue()) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ return true;
+ }
+ }
+ return false;
+}
+
/// IsLICMCandidate - Returns true if the instruction may be a suitable
/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
/// not safe to hoist it.
@@ -773,9 +899,12 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// If it is load then check if it is guaranteed to execute by making sure that
// it dominates all exiting blocks. If it doesn't, then there is a path out of
- // the loop which does not execute this load, so we can't hoist it.
+ // the loop which does not execute this load, so we can't hoist it. Loads
+ // from constant memory are not safe to speculate all the time, for example
+ // indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
- if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
+ if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ !IsGuaranteedToExecute(I.getParent()))
return false;
return true;
@@ -785,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
/// invariant. I.e., all virtual register operands are defined outside of the
/// loop, physical registers aren't accessed explicitly, and there are no side
/// effects that aren't captured by the operands or other flags.
-///
+///
bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
if (!IsLICMCandidate(I))
return false;
@@ -806,18 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->def_empty(Reg))
- return false;
- if (AllocatableSet.test(Reg))
+ if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
return false;
- // Check for a def among the register's aliases too.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- if (!MRI->def_empty(AliasReg))
- return false;
- if (AllocatableSet.test(AliasReg))
- return false;
- }
// Otherwise it's safe to move.
continue;
} else if (!MO.isDead()) {
@@ -847,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
}
-/// HasAnyPHIUse - Return true if the specified register is used by any
-/// phi node.
-bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
- UE = MRI->use_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (UseMI->isPHI())
- return true;
- // Look pass copies as well.
- if (UseMI->isCopy()) {
- unsigned Def = UseMI->getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Def) &&
- HasAnyPHIUse(Def))
- return true;
+/// HasLoopPHIUse - Return true if the specified instruction is used by a
+/// phi node and hoisting it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+ SmallVector<const MachineInstr*, 8> Work(1, MI);
+ do {
+ MI = Work.pop_back_val();
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ // A PHI may cause a copy to be inserted.
+ if (UseMI->isPHI()) {
+ // A PHI inside the loop causes a copy because the live range of Reg is
+ // extended across the PHI.
+ if (CurLoop->contains(UseMI))
+ return true;
+ // A PHI in an exit block can cause a copy to be inserted if the PHI
+ // has multiple predecessors in the loop with different values.
+ // For now, approximate by rejecting all exit blocks.
+ if (isExitBlock(UseMI->getParent()))
+ return true;
+ continue;
+ }
+ // Look past copies as well.
+ if (UseMI->isCopy() && CurLoop->contains(UseMI))
+ Work.push_back(UseMI);
+ }
}
- }
+ } while (!Work.empty());
return false;
}
@@ -903,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
/// the operand latency between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
- if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ if (MI.isAsCheapAsAMove() || MI.isCopyLike())
return true;
if (!InstrItins || InstrItins->isEmpty())
return false;
@@ -930,16 +1067,25 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
/// if hoisting an instruction of the given cost matrix can cause high
/// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+ bool CheapInstr) {
for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
CI != CE; ++CI) {
- if (CI->second <= 0)
+ if (CI->second <= 0)
continue;
unsigned RCId = CI->first;
+ unsigned Limit = RegLimit[RCId];
+ int Cost = CI->second;
+
+ // Don't hoist cheap instructions if they would increase register pressure,
+ // even if we're under the limit.
+ if (CheapInstr)
+ return true;
+
for (unsigned i = BackTrace.size(); i != 0; --i) {
SmallVector<unsigned, 8> &RP = BackTrace[i-1];
- if (RP[RCId] + CI->second >= RegLimit[RCId])
+ if (RP[RCId] + Cost >= Limit)
return true;
}
}
@@ -999,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (MI.isImplicitDef())
return true;
- // If the instruction is cheap, only hoist if it is re-materilizable. LICM
- // will increase register pressure. It's probably not worth it if the
- // instruction is cheap.
- // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
- // these tend to help performance in low register pressure situation. The
- // trade off is it may cause spill in high pressure situation. It will end up
- // adding a store in the loop preheader. But the reload is no more expensive.
- // The side benefit is these loads are frequently CSE'ed.
- if (IsCheapInstruction(MI)) {
- if (!TII->isTriviallyReMaterializable(&MI, AA))
- return false;
- } else {
- // Estimate register pressure to determine whether to LICM the instruction.
- // In low register pressure situation, we can be more aggressive about
- // hoisting. Also, favors hoisting long latency instructions even in
- // moderately high pressure situation.
- // FIXME: If there are long latency loop-invariant instructions inside the
- // loop at this point, why didn't the optimizer's LICM hoist them?
- DenseMap<unsigned, int> Cost;
- for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.isImplicit())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
+ // Besides removing computation from the loop, hoisting an instruction has
+ // these effects:
+ //
+ // - The value defined by the instruction becomes live across the entire
+ // loop. This increases register pressure in the loop.
+ //
+ // - If the value is used by a PHI in the loop, a copy will be required for
+ // lowering the PHI after extending the live range.
+ //
+ // - When hoisting the last use of a value in the loop, that value no longer
+ // needs to be live in the loop. This lowers register pressure in the loop.
+
+ bool CheapInstr = IsCheapInstruction(MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI);
+
+ // Don't hoist a cheap instruction if it would create a copy in the loop.
+ if (CheapInstr && CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ return false;
+ }
- unsigned RCId, RCCost;
- getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
- if (MO.isDef()) {
- if (HasHighOperandLatency(MI, i, Reg)) {
- ++NumHighLatency;
- return true;
- }
+ // Rematerializable instructions should always be hoisted since the register
+ // allocator can just pull them down again when needed.
+ if (TII->isTriviallyReMaterializable(&MI, AA))
+ return true;
+
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // Cheap instructions will only be hoisted if they don't increase register
+ // pressure at all.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
- DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
- if (CI != Cost.end())
- CI->second += RCCost;
- else
- Cost.insert(std::make_pair(RCId, RCCost));
- } else if (isOperandKill(MO, MRI)) {
- // Is a virtual register use is a kill, hoisting it out of the loop
- // may actually reduce register pressure or be register pressure
- // neutral.
- DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
- if (CI != Cost.end())
- CI->second -= RCCost;
- else
- Cost.insert(std::make_pair(RCId, -RCCost));
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ ++NumHighLatency;
+ return true;
}
+ Cost[RCId] += RCCost;
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ Cost[RCId] -= RCCost;
}
+ }
- // Visit BBs from header to current BB, if hoisting this doesn't cause
- // high register pressure, then it's safe to proceed.
- if (!CanCauseHighRegPressure(Cost)) {
- ++NumLowRP;
- return true;
- }
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+ DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ ++NumLowRP;
+ return true;
+ }
- // Do not "speculate" in high register pressure situation. If an
- // instruction is not guaranteed to be executed in the loop, it's best to be
- // conservative.
- if (AvoidSpeculation &&
- (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
- return false;
+ // Don't risk increasing register pressure if it would create copies.
+ if (CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ return false;
+ }
- // High register pressure situation, only hoist if the instruction is going to
- // be remat'ed.
- if (!TII->isTriviallyReMaterializable(&MI, AA) &&
- !MI.isInvariantLoad(AA))
- return false;
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ DEBUG(dbgs() << "Won't speculate: " << MI);
+ return false;
}
- // If result(s) of this instruction is used by PHIs outside of the loop, then
- // don't hoist it if the instruction because it will introduce an extra copy.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- if (HasAnyPHIUse(MO.getReg()))
- return false;
+ // High register pressure situation, only hoist if the instruction is going
+ // to be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA)) {
+ DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ return false;
}
return true;
@@ -1087,7 +1241,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
- if (MI->getDesc().canFoldAsLoad())
+ if (MI->canFoldAsLoad())
return 0;
// If not, we may be able to unfold a load and hoist that.
@@ -1123,8 +1277,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
assert(NewMIs.size() == 2 &&
"Unfolded a load into multiple instructions!");
MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MI, NewMIs[0]);
- MBB->insert(MI, NewMIs[1]);
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
// If unfolding produced a load that wasn't loop-invariant or profitable to
// hoist, discard the new instructions and bail.
if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
@@ -1180,6 +1335,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
// Replace virtual registers defined by MI by their counterparts defined
// by Dup.
+ SmallVector<unsigned, 2> Defs;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -1190,11 +1346,33 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
"Instructions with different phys regs are not identical!");
if (MO.isReg() && MO.isDef() &&
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- MRI->clearKillFlags(Dup->getOperand(i).getReg());
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ Defs.push_back(i);
+ }
+
+ SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+ if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+ // Restore old RCs if more than one defs.
+ for (unsigned j = 0; j != i; ++j)
+ MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+ return false;
}
}
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ MRI->replaceRegWith(Reg, DupReg);
+ MRI->clearKillFlags(DupReg);
+ }
+
MI->eraseFromParent();
++NumCSEed;
return true;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 80c4854238af..ea98b23c6d57 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -257,7 +257,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
: ImmutablePass(ID), Context(MAI, MRI, MOFI),
ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
CallsUnwindInit(0), DbgInfoAvailable(false),
- CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ UsesVAFloatArgument(false) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
// Always emit some info, by default "no personality" info.
Personalities.push_back(NULL);
@@ -268,9 +268,9 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
MachineModuleInfo::MachineModuleInfo()
: ImmutablePass(ID),
Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
- assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
- "should always be explicitly constructed by LLVMTargetMachine");
- abort();
+ llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+ "MMI should always be explicitly constructed by "
+ "LLVMTargetMachine");
}
MachineModuleInfo::~MachineModuleInfo() {
@@ -503,8 +503,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
/// indexes.
void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
ArrayRef<unsigned> Sites) {
- for (unsigned I = 0, E = Sites.size(); I != E; ++I)
- LPadToCallSiteMap[Sym].push_back(Sites[I]);
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
}
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
@@ -541,8 +540,7 @@ try_next:;
// Add the new filter.
int FilterID = -(1 + FilterIds.size());
FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
- for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
- FilterIds.push_back(TyIds[I]);
+ FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
FilterEnds.push_back(FilterIds.size());
FilterIds.push_back(0); // terminator
return FilterID;
@@ -561,13 +559,13 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
const Function* Personality = NULL;
// Scan landing pads. If there is at least one non-NULL personality - use it.
- for (unsigned i = 0; i != LandingPads.size(); ++i)
+ for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
if (LandingPads[i].Personality) {
Personality = LandingPads[i].Personality;
break;
}
- for (unsigned i = 0; i < Personalities.size(); ++i) {
+ for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
if (Personalities[i] == Personality)
return i;
}
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
index 9f4ef1287803..58e067bcb9b2 100644
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -16,6 +16,7 @@
using namespace llvm;
+void MachinePassRegistryListener::anchor() { }
/// Add - Adds a function pass to the registration list.
///
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 266ebf64a3fc..7ea151713a6d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -18,11 +18,12 @@
using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
- : TRI(&TRI), IsSSA(true) {
+ : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedPhysRegs.resize(TRI.getNumRegs());
-
+ UsedPhysRegMask.resize(TRI.getNumRegs());
+
// Create the physreg use/def lists.
PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
@@ -30,9 +31,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
- assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
- "Vreg use list non-empty still?");
+ clearVirtRegs();
for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
@@ -76,12 +75,14 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
// Accumulate constraints from all uses.
for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
++I) {
- // TRI doesn't have accurate enough information to model this yet.
- if (I.getOperand().getSubReg())
- return false;
const TargetRegisterClass *OpRC =
I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
- if (OpRC)
+ if (unsigned SubIdx = I.getOperand().getSubReg()) {
+ if (OpRC)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+ else
+ NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+ } else if (OpRC)
NewRC = TRI->getCommonSubClass(NewRC, OpRC);
if (!NewRC || NewRC == OldRC)
return false;
@@ -115,6 +116,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
return Reg;
}
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
+#endif
+ VRegInfo.clear();
+}
+
/// HandleVRegListReallocation - We just added a virtual register to the
/// VRegInfo info list and it reallocated. Update the use/def lists info
/// pointers.
@@ -150,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
/// form, so there should only be one definition.
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
// Since we are in SSA form, we can use the first definition.
- if (!def_empty(Reg))
- return &*def_begin(Reg);
- return 0;
+ def_iterator I = def_begin(Reg);
+ return !I.atEnd() ? &*I : 0;
}
bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
@@ -242,18 +252,31 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
}
}
-void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
- for (int i = UsedPhysRegs.find_first(); i >= 0;
- i = UsedPhysRegs.find_next(i))
- for (const unsigned *SS = TRI.getSubRegisters(i);
- unsigned SubReg = *SS; ++SS)
- if (SubReg > unsigned(i))
- UsedPhysRegs.set(SubReg);
-}
-
#ifndef NDEBUG
void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
I.getOperand().getParent()->dump();
}
#endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ ReservedRegs = TRI->getReservedRegs(MF);
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+ // Check if any overlapping register is modified.
+ for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+ if (!def_empty(*R))
+ return false;
+
+ // Check if any overlapping register is allocatable so it may be used later.
+ if (AllocatableRegs.empty())
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+ if (AllocatableRegs.test(*R))
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 84d6df25397c..070a55704dc5 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
if (BB->empty())
return 0;
- MachineBasicBlock::iterator I = BB->front();
+ MachineBasicBlock::iterator I = BB->begin();
if (!I->isPHI())
return 0;
@@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
return DupPHI;
// Otherwise, we do need a PHI: insert one now.
- MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
Loc, VRC, MRI, TII);
@@ -214,7 +214,6 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
}
llvm_unreachable("MachineOperand::getParent() failure?");
- return 0;
}
/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
@@ -311,7 +310,7 @@ public:
/// Add it into the specified block and return the register.
static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
MachineSSAUpdater *Updater) {
- MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
Updater->VRC, Updater->MRI,
Updater->TII);
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..1d3241b8cc6b
--- /dev/null
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,614 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+#include <queue>
+
+using namespace llvm;
+
+static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedContext,
+ public MachineFunctionPass {
+public:
+ MachineScheduler();
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory() {}
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+
+ static char ID; // Class identification, replacement for typeinfo
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineFunctionPass(ID) {
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return 0;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ LIS = &getAnalysis<LiveIntervals>();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor == useDefaultMachineSched) {
+ // Get the default scheduler set by the target.
+ Ctor = MachineSchedRegistry::getDefault();
+ if (!Ctor) {
+ Ctor = createConvergingSched;
+ MachineSchedRegistry::setDefault(Ctor);
+ }
+ }
+ // Instantiate the selected scheduler.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+ // Visit all machine basic blocks.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler->startBlock(MBB);
+
+ // Break the block into scheduling regions [I, RegionEnd), and schedule each
+ // region as soon as it is discovered. RegionEnd points the the scheduling
+ // boundary at the bottom of the region. The DAG does not include RegionEnd,
+ // but the region does (i.e. the next RegionEnd is above the previous
+ // RegionBegin). If the current block has no terminator then RegionEnd ==
+ // MBB->end() for the bottom region.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end()
+ || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+ --RegionEnd;
+ // Count the boundary instruction.
+ --RemainingCount;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+ break;
+ }
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+ << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " Remaining: " << RemainingCount << "\n");
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->schedule();
+
+ // Close the current region.
+ Scheduler->exitRegion();
+
+ // Scheduling has invalidated the current iterator 'I'. Ask the
+ // scheduler for the top of it's scheduled region.
+ RegionEnd = Scheduler->begin();
+ }
+ assert(RemainingCount == 0 && "Instruction count mismatch!");
+ Scheduler->finishBlock();
+ }
+ Scheduler->finalizeSchedule();
+ DEBUG(LIS->print(dbgs()));
+ return true;
+}
+
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ScheduleDAGMI;
+
+/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
+/// scheduling algorithm.
+///
+/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
+/// in ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+ virtual ~MachineSchedStrategy() {}
+
+ /// Initialize the strategy after building the DAG for a new region.
+ virtual void initialize(ScheduleDAGMI *DAG) = 0;
+
+ /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+ /// schedule the node at the top of the unscheduled region. Otherwise it will
+ /// be scheduled at the bottom.
+ virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+ /// When all predecessor dependencies have been resolved, free this node for
+ /// top-down scheduling.
+ virtual void releaseTopNode(SUnit *SU) = 0;
+ /// When all successor dependencies have been resolved, free this node for
+ /// bottom-up scheduling.
+ virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
+/// machine instructions while updating LiveIntervals.
+class ScheduleDAGMI : public ScheduleDAGInstrs {
+ AliasAnalysis *AA;
+ MachineSchedStrategy *SchedImpl;
+
+ /// The top of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentTop;
+
+ /// The bottom of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentBottom;
+
+ /// The number of instructions scheduled so far. Used to cut off the
+ /// scheduler at the point determined by misched-cutoff.
+ unsigned NumInstrsScheduled;
+public:
+ ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
+ ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+ AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
+ NumInstrsScheduled(0) {}
+
+ ~ScheduleDAGMI() {
+ delete SchedImpl;
+ }
+
+ MachineBasicBlock::iterator top() const { return CurrentTop; }
+ MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
+
+ /// Implement ScheduleDAGInstrs interface.
+ void schedule();
+
+protected:
+ void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+ bool checkSchedLimit();
+
+ void releaseSucc(SUnit *SU, SDep *SuccEdge);
+ void releaseSuccessors(SUnit *SU);
+ void releasePred(SUnit *SU, SDep *PredEdge);
+ void releasePredecessors(SUnit *SU);
+};
+} // namespace
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ releaseSucc(SU, &*I);
+ }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPos) {
+ // Fix RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ RegionBegin = llvm::next(RegionBegin);
+ BB->splice(InsertPos, BB, MI);
+ LIS->handleMove(MI);
+ // Fix RegionBegin if another instruction moves above the first instruction.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region.
+void ScheduleDAGMI::schedule() {
+ buildSchedGraph(AA);
+
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ if (ViewMISchedDAGs) viewGraph();
+
+ SchedImpl->initialize(this);
+
+ // Release edges from the special Entry node or to the special Exit node.
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ // Release all DAG roots for scheduling.
+ for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
+ I != E; ++I) {
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (I->Preds.empty())
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ SchedImpl->releaseBottomNode(&(*I));
+ }
+
+ CurrentTop = RegionBegin;
+ CurrentBottom = RegionEnd;
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction:\n"; SU->dump(this));
+ if (!checkSchedLimit())
+ break;
+
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ ++CurrentTop;
+ else
+ moveInstruction(MI, CurrentTop);
+ // Release dependent instructions for scheduling.
+ releaseSuccessors(SU);
+ }
+ else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ if (&*llvm::prior(CurrentBottom) == MI)
+ --CurrentBottom;
+ else {
+ if (&*CurrentTop == MI)
+ CurrentTop = llvm::next(CurrentTop);
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ // Release dependent instructions for scheduling.
+ releasePredecessors(SU);
+ }
+ SU->isScheduled = true;
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
+
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+ ScheduleDAGMI *DAG;
+
+ unsigned NumTopReady;
+ unsigned NumBottomReady;
+
+public:
+ virtual void initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ }
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom())
+ return NULL;
+
+ // As an initial placeholder heuristic, schedule in the direction that has
+ // the fewest choices.
+ SUnit *SU;
+ if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
+ SU = DAG->getSUnit(DAG->top());
+ IsTopNode = true;
+ }
+ else {
+ SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
+ IsTopNode = false;
+ }
+ if (SU->isTopReady()) {
+ assert(NumTopReady > 0 && "bad ready count");
+ --NumTopReady;
+ }
+ if (SU->isBottomReady()) {
+ assert(NumBottomReady > 0 && "bad ready count");
+ --NumBottomReady;
+ }
+ return SU;
+ }
+
+ virtual void releaseTopNode(SUnit *SU) {
+ ++NumTopReady;
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ ++NumBottomReady;
+ }
+};
+} // namespace
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new ConvergingScheduler());
+}
+static MachineSchedRegistry
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+ createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+ bool operator()(SUnit *A, SUnit *B) const {
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
+ }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ TopQ;
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ BottomQ;
+public:
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
+
+ virtual void initialize(ScheduleDAGMI *) {
+ TopQ.clear();
+ BottomQ.clear();
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return NULL;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ }
+ else {
+ do {
+ if (BottomQ.empty()) return NULL;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
+ return SU;
+ }
+
+ virtual void releaseTopNode(SUnit *SU) {
+ TopQ.push(SU);
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ BottomQ.push(SU);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
+#endif // !NDEBUG
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 29cfb49953b9..1ce546b578ad 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -32,7 +32,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static cl::opt<bool>
+static cl::opt<bool>
SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
cl::init(true), cl::Hidden);
@@ -90,12 +90,19 @@ namespace {
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge, bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge);
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo);
+
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
} // end anonymous namespace
char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -104,8 +111,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
-FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
-
bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB) {
if (!MI->isCopy())
@@ -147,14 +152,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
+ // Ignore debug uses because debug info doesn't affect the code.
if (MRI->use_nodbg_empty(Reg))
return true;
- // Ignoring debug uses is necessary so debug info doesn't affect the code.
- // This may leave a referencing dbg_value in the original block, before
- // the definition of the vreg. Dwarf generator handles this although the
- // user might not get the right info at runtime.
-
// BreakPHIEdge is true if all the uses are in the successor MBB being sunken
// into and they are all PHI nodes. In this case, machine-sink must break
// the critical edge first. e.g.
@@ -291,7 +292,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
if (!CEBCandidates.insert(std::make_pair(From, To)))
return true;
- if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ if (!MI->isCopy() && !MI->isAsCheapAsAMove())
return true;
// MI is cheap, we probably don't want to break the critical edge for it.
@@ -382,9 +383,9 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
}
-/// collectDebgValues - Scan instructions following MI and collect any
+/// collectDebgValues - Scan instructions following MI and collect any
/// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr *MI,
+static void collectDebugValues(MachineInstr *MI,
SmallVector<MachineInstr *, 2> & DbgValues) {
DbgValues.clear();
if (!MI->getOperand(0).isReg())
@@ -401,35 +402,76 @@ static void collectDebugValues(MachineInstr *MI,
}
}
-/// SinkInstruction - Determine whether it is safe to sink the specified machine
-/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
- // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
- // be close to the source to make it easier to coalesce.
- if (AvoidsSinking(MI, MRI))
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+ // FIXME - Use real post dominator.
+ if (A->succ_size() != 2)
+ return false;
+ MachineBasicBlock::succ_iterator I = A->succ_begin();
+ if (B == *I)
+ ++I;
+ MachineBasicBlock *OtherSuccBlock = *I;
+ if (OtherSuccBlock->succ_size() != 1 ||
+ *(OtherSuccBlock->succ_begin()) != B)
return false;
- // Check if it's safe to move the instruction.
- if (!MI->isSafeToMove(TII, AA, SawStore))
+ return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo) {
+ assert (MI && "Invalid MachineInstr!");
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
return false;
- // FIXME: This should include support for sinking instructions within the
- // block they are currently in to shorten the live ranges. We often get
- // instructions sunk into the top of a large block, but it would be better to
- // also sink them down before their first use in the block. This xform has to
- // be careful not to *increase* register pressure though, e.g. sinking
- // "x = y + z" down if it kills y and z would increase the live ranges of y
- // and z and only shrink the live range of x.
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!isPostDominatedBy(MBB, SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then catch results.
+ if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+ // If SuccToSinkTo is final destination and it is a post dominator of current
+ // block then it is not profitable to sink MI into SuccToSinkTo block.
+ return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool &BreakPHIEdge) {
+
+ assert (MI && "Invalid MachineInstr!");
+ assert (MBB && "Invalid MachineBasicBlock!");
// Loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
- MachineBasicBlock *ParentBlock = MI->getParent();
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
-
- bool BreakPHIEdge = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -442,24 +484,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->def_empty(Reg))
- return false;
-
- if (AllocatableSet.test(Reg))
- return false;
-
- // Check for a def among the register's aliases too.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- if (!MRI->def_empty(AliasReg))
- return false;
-
- if (AllocatableSet.test(AliasReg))
- return false;
- }
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ return NULL;
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
- return false;
+ return NULL;
}
} else {
// Virtual register uses are always safe to sink.
@@ -467,7 +496,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If it's not safe to move defs of the register class, then abort.
if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
- return false;
+ return NULL;
// FIXME: This picks a successor to sink into based on having one
// successor that dominates all the uses. However, there are cases where
@@ -488,48 +517,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
bool LocalUse = false;
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
BreakPHIEdge, LocalUse))
- return false;
+ return NULL;
continue;
}
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
- E = ParentBlock->succ_end(); SI != E; ++SI) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
- if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
BreakPHIEdge, LocalUse)) {
- SuccToSinkTo = *SI;
+ SuccToSinkTo = SuccBlock;
break;
}
if (LocalUse)
// Def is used locally, it's never safe to move this def.
- return false;
+ return NULL;
}
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
- return false;
+ return NULL;
+ else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+ return NULL;
}
}
- // If there are no outputs, it must have side-effects.
- if (SuccToSinkTo == 0)
- return false;
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MBB == SuccToSinkTo)
+ return NULL;
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo->isLandingPad())
+ if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ return NULL;
+
+ return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
return false;
- // It is not possible to sink an instruction into its own block. This can
- // happen with loops.
- if (MI->getParent() == SuccToSinkTo)
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI->getParent();
+ MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 26847d39e7ad..74ba94d1fcc0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -69,14 +70,17 @@ namespace {
unsigned foundErrors;
typedef SmallVector<unsigned, 16> RegVector;
+ typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
const MachineInstr *FirstTerminator;
BitVector regsReserved;
+ BitVector regsAllocatable;
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
+ RegMaskVector regMasks;
RegSet regsLiveInButUnused;
SlotIndex lastIndex;
@@ -85,7 +89,7 @@ namespace {
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
RV.push_back(*R);
}
@@ -175,6 +179,10 @@ namespace {
return Reg < regsReserved.size() && regsReserved.test(Reg);
}
+ bool isAllocatable(unsigned Reg) {
+ return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+ }
+
// Analysis information if available
LiveVariables *LiveVars;
LiveIntervals *LiveInts;
@@ -194,6 +202,7 @@ namespace {
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
void calcRegsPassed();
void checkPHIOps(const MachineBasicBlock *MBB);
@@ -279,13 +288,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
visitMachineBasicBlockBefore(MFI);
- for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
- MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != MFI) {
report("Bad instruction parent pointer", MFI);
*OS << "Instruction: " << *MBBI;
continue;
}
+ // Skip BUNDLE instruction for now. FIXME: We should add code to verify
+ // the BUNDLE's specifically.
+ if (MBBI->isBundle())
+ continue;
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
@@ -305,6 +318,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
regsDefined.clear();
regsDead.clear();
regsKilled.clear();
+ regMasks.clear();
regsLiveInButUnused.clear();
MBBInfoMap.clear();
@@ -320,7 +334,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
MF->print(*OS, Indexes);
}
*OS << "*** Bad machine code: " << msg << " ***\n"
- << "- function: " << MF->getFunction()->getNameStr() << "\n";
+ << "- function: " << MF->getFunction()->getName() << "\n";
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
@@ -370,12 +384,15 @@ void MachineVerifier::visitMachineFunctionBefore() {
// A sub-register of a reserved register is also reserved
for (int Reg = regsReserved.find_first(); Reg>=0;
Reg = regsReserved.find_next(Reg)) {
- for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
// FIXME: This should probably be:
// assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
regsReserved.set(*Sub);
}
}
+
+ regsAllocatable = TRI->getAllocatableSet(*MF);
+
markReachable(&MF->front());
}
@@ -393,6 +410,20 @@ void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = 0;
+ if (MRI->isSSA()) {
+ // If this block has allocatable physical registers live-in, check that
+ // it is an entry block or landing pad.
+ for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end();
+ LI != LE; ++LI) {
+ unsigned reg = *LI;
+ if (isAllocatable(reg) && !MBB->isLandingPad() &&
+ MBB != MBB->getParent()->begin()) {
+ report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ }
+ }
+ }
+
// Count the number of landing pad successors.
SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -435,7 +466,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ if (!MBB->empty() && MBB->back().isBarrier() &&
!TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
@@ -456,10 +487,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via unconditional branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via unconditional branch but the branch isn't a "
"terminator instruction!", MBB);
}
@@ -479,10 +510,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
- } else if (MBB->back().getDesc().isBarrier()) {
+ } else if (MBB->back().isBarrier()) {
report("MBB exits via conditional branch/fall-through but ends with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/fall-through but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -499,10 +530,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via conditional branch/branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/branch but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -523,7 +554,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
continue;
}
regsLive.insert(*I);
- for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++)
regsLive.insert(*R);
}
regsLiveInButUnused = regsLive;
@@ -533,7 +564,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
BitVector PR = MFI->getPristineRegs(MBB);
for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
regsLive.insert(I);
- for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++)
regsLive.insert(*R);
}
@@ -555,19 +586,22 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Check the MachineMemOperands for basic consistency.
for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
E = MI->memoperands_end(); I != E; ++I) {
- if ((*I)->isLoad() && !MCID.mayLoad())
+ if ((*I)->isLoad() && !MI->mayLoad())
report("Missing mayLoad flag", MI);
- if ((*I)->isStore() && !MCID.mayStore())
+ if ((*I)->isStore() && !MI->mayStore())
report("Missing mayStore flag", MI);
}
// Debug values must not have a slot index.
- // Other instructions must have one.
+ // Other instructions must have one, unless they are inside a bundle.
if (LiveInts) {
bool mapped = !LiveInts->isNotInMIMap(MI);
if (MI->isDebugValue()) {
if (mapped)
report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
} else {
if (!mapped)
report("Missing slot index", MI);
@@ -575,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
// Ensure non-terminators don't follow terminators.
- if (MCID.isTerminator()) {
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(MI)) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
@@ -606,7 +642,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end.
if (MO->isReg() &&
- !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
if (MO->isDef() && !MCOI.isOptionalDef())
report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
@@ -614,7 +650,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
} else {
// ARM adds %reg0 operands to indicate predicates. We'll allow that.
- if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
report("Extra explicit operand on non-variadic instruction", MO, MONum);
}
@@ -623,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const unsigned Reg = MO->getReg();
if (!Reg)
return;
+ if (MRI->tracksLiveness() && !MI->isDebugValue())
+ checkLiveness(MO, MONum);
- // Check Live Variables.
- if (MI->isDebugValue()) {
- // Liveness checks are not valid for debug values.
- } else if (MO->isUse() && !MO->isUndef()) {
- regsLiveInButUnused.erase(Reg);
-
- bool isKill = false;
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
- // A two-addr use counts as a kill if use and def are the same.
- unsigned DefReg = MI->getOperand(defIdx).getReg();
- if (Reg == DefReg)
- isKill = true;
- else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- report("Two-address instruction operands must be identical",
- MO, MONum);
- }
- } else
- isKill = MO->isKill();
-
- if (isKill)
- addRegWithSubRegs(regsKilled, Reg);
-
- // Check that LiveVars knows this kill.
- if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
- MO->isKill()) {
- LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
- if (std::find(VI.Kills.begin(),
- VI.Kills.end(), MI) == VI.Kills.end())
- report("Kill missing from LiveVariables", MO, MONum);
- }
-
- // Check LiveInts liveness and kill.
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (!LI.liveAt(UseIdx)) {
- report("No live range at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
- }
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
- report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
- }
- } else {
- report("Virtual register has no Live interval", MO, MONum);
- }
- }
-
- // Use of a dead register.
- if (!regsLive.count(Reg)) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- // Reserved registers may be used even when 'dead'.
- if (!isReserved(Reg))
- report("Using an undefined physical register", MO, MONum);
- } else {
- BBInfo &MInfo = MBBInfoMap[MI->getParent()];
- // We don't know which virtual registers are live in, so only complain
- // if vreg was killed in this MBB. Otherwise keep track of vregs that
- // must be live in. PHI instructions are handled separately.
- if (MInfo.regsKilled.count(Reg))
- report("Using a killed virtual register", MO, MONum);
- else if (!MI->isPHI())
- MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
- }
- }
- } else if (MO->isDef()) {
- // Register defined.
- // TODO: verify that earlyclobber ops are not used.
- if (MO->isDead())
- addRegWithSubRegs(regsDead, Reg);
- else
- addRegWithSubRegs(regsDefined, Reg);
-
- // Verify SSA form.
- if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
- llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
- report("Multiple virtual register defs in SSA form", MO, MONum);
-
- // Check LiveInts for a live range, but only for virtual registers.
- if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
- !LiveInts->isNotInMIMap(MI)) {
- SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
- assert(VNI && "NULL valno is not allowed");
- if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
- report("Inconsistent valno->def", MO, MONum);
- *OS << "Valno " << VNI->id << " is not defined at "
- << DefIdx << " in " << LI << '\n';
- }
- } else {
- report("No live range at def", MO, MONum);
- *OS << DefIdx << " is not live in " << LI << '\n';
- }
- } else {
- report("Virtual register has no Live interval", MO, MONum);
- }
- }
- }
// Check register classes.
if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -790,6 +723,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
break;
}
+ case MachineOperand::MO_RegisterMask:
+ regMasks.push_back(MO->getRegMask());
+ break;
+
case MachineOperand::MO_MachineBasicBlock:
if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
report("PHI operand is not in the CFG", MO, MONum);
@@ -800,11 +737,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
LiveInts && !LiveInts->isNotInMIMap(MI)) {
LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
SlotIndex Idx = LiveInts->getInstructionIndex(MI);
- if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+ if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
report("Instruction loads from dead spill slot", MO, MONum);
*OS << "Live stack: " << LI << '\n';
}
- if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+ if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
report("Instruction stores to dead spill slot", MO, MONum);
*OS << "Live stack: " << LI << '\n';
}
@@ -816,10 +753,127 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
}
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const unsigned Reg = MO->getReg();
+
+ // Both use and def operands can read a register.
+ if (MO->readsReg()) {
+ regsLiveInButUnused.erase(Reg);
+
+ bool isKill = false;
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+ // A two-addr use counts as a kill if use and def are the same.
+ unsigned DefReg = MI->getOperand(defIdx).getReg();
+ if (Reg == DefReg)
+ isKill = true;
+ else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ report("Two-address instruction operands must be identical", MO, MONum);
+ }
+ } else
+ isKill = MO->isKill();
+
+ if (isKill)
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (!LI.liveAt(UseIdx)) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ }
+
+ if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << VNI->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+}
+
void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
set_union(MInfo.regsKilled, regsKilled);
set_subtract(regsLive, regsKilled); regsKilled.clear();
+ // Kill any masked registers.
+ while (!regMasks.empty()) {
+ const uint32_t *Mask = regMasks.pop_back_val();
+ for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ MachineOperand::clobbersPhysReg(Mask, *I))
+ regsDead.push_back(*I);
+ }
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
@@ -855,7 +909,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
void MachineVerifier::calcRegsPassed() {
// First push live-out regs to successors' vregsPassed. Remember the MBBs that
// have any vregsPassed.
- DenseSet<const MachineBasicBlock*> todo;
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
const MachineBasicBlock &MBB(*MFI);
@@ -892,7 +946,7 @@ void MachineVerifier::calcRegsPassed() {
// similar to calcRegsPassed, only backwards.
void MachineVerifier::calcRegsRequired() {
// First push live-in regs to predecessors' vregsRequired.
- DenseSet<const MachineBasicBlock*> todo;
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
const MachineBasicBlock &MBB(*MFI);
@@ -925,9 +979,10 @@ void MachineVerifier::calcRegsRequired() {
// Check PHI instructions at the beginning of MBB. It is assumed that
// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
BBI != BBE && BBI->isPHI(); ++BBI) {
- DenseSet<const MachineBasicBlock*> seen;
+ seen.clear();
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
@@ -968,8 +1023,17 @@ void MachineVerifier::visitMachineFunctionAfter() {
}
// Now check liveness info if available
- if (LiveVars || LiveInts)
- calcRegsRequired();
+ calcRegsRequired();
+
+ if (MRI->isSSA() && !MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ report("Virtual register def doesn't dominate all uses.",
+ MRI->getVRegDef(*I));
+ }
+
if (LiveVars)
verifyLiveVariables();
if (LiveInts)
@@ -1065,33 +1129,43 @@ void MachineVerifier::verifyLiveIntervals() {
report("No instruction at def index", MF);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LI << '\n';
- } else if (!MI->modifiesRegister(LI.reg, TRI)) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ continue;
}
+ bool hasDef = false;
bool isEarlyClobber = false;
- if (MI) {
- for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
- MOI->isEarlyClobber()) {
- isEarlyClobber = true;
- break;
- }
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->regsOverlap(LI.reg, MOI->getReg()))
+ continue;
}
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
+
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
}
// Early clobber defs begin at USE slots, but other defs must begin at
// DEF slots.
if (isEarlyClobber) {
- if (!VNI->def.isUse()) {
- report("Early clobber def must be at a USE slot", MF);
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MF);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LI << '\n';
}
- } else if (!VNI->def.isDef()) {
- report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MF);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LI << '\n';
}
@@ -1137,32 +1211,76 @@ void MachineVerifier::verifyLiveIntervals() {
*OS << " in " << LI << '\n';
continue;
}
- if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
+
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ continue;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
I->print(*OS);
*OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
- } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
- !MI->readsVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- // FIXME: Should we check for each of these?
- bool hasDeadDef = false;
- for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
- hasDeadDef = true;
- break;
- }
- }
+ << MBBStartIdx << '\n';
+ continue;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == E || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
+ if (I->end.isDead()) {
if (!hasDeadDef) {
- report("Instruction killing live segment neither defines nor reads "
- "register", MI);
+ report("Instruction doesn't have a dead def operand", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register",
+ MI);
I->print(*OS);
*OS << " in " << LI << '\n';
}
@@ -1192,8 +1310,8 @@ void MachineVerifier::verifyLiveIntervals() {
// Check that VNI is live-out of all predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
- const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
continue;
@@ -1201,7 +1319,7 @@ void MachineVerifier::verifyLiveIntervals() {
if (!PVNI) {
report("Register not marked live out of predecessor", *PI);
*OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
<< PEnd << " in " << LI << '\n';
continue;
}
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
deleted file mode 100644
index cf05275d7a31..000000000000
--- a/lib/CodeGen/ObjectCodeEmitter.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-
-//===----------------------------------------------------------------------===//
-// ObjectCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
-ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
-ObjectCodeEmitter::~ObjectCodeEmitter() {}
-
-/// setBinaryObject - set the BinaryObject we are writting to
-void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
-
-/// emitByte - This callback is invoked when a byte needs to be
-/// written to the data stream, without buffer overflow testing.
-void ObjectCodeEmitter::emitByte(uint8_t B) {
- BO->emitByte(B);
-}
-
-/// emitWordLE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitWordLE(uint32_t W) {
- BO->emitWordLE(W);
-}
-
-/// emitWordBE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitWordBE(uint32_t W) {
- BO->emitWordBE(W);
-}
-
-/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
- BO->emitDWordLE(W);
-}
-
-/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
- BO->emitDWordBE(W);
-}
-
-/// emitAlignment - Align 'BO' to the necessary alignment boundary.
-void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
- uint8_t fill /* 0 */) {
- BO->emitAlignment(Alignment, fill);
-}
-
-/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
- BO->emitULEB128Bytes(Value);
-}
-
-/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
- BO->emitSLEB128Bytes(Value);
-}
-
-/// emitString - This callback is invoked when a String needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitString(const std::string &String) {
- BO->emitString(String);
-}
-
-/// getCurrentPCValue - This returns the address that the next emitted byte
-/// will be output to.
-uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
- return BO->getCurrentPCOffset();
-}
-
-/// getCurrentPCOffset - Return the offset from the start of the emitted
-/// buffer that we are currently writing to.
-uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
- return BO->getCurrentPCOffset();
-}
-
-/// addRelocation - Whenever a relocatable address is needed, it should be
-/// noted with this interface.
-void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
- BO->addRelocation(relocation);
-}
-
-/// StartMachineBasicBlock - This should be called by the target when a new
-/// basic block is about to be emitted. This way the MCE knows where the
-/// start of the block is, and can implement getMachineBasicBlockAddress.
-void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
- if (MBBLocations.size() <= (unsigned)MBB->getNumber())
- MBBLocations.resize((MBB->getNumber()+1)*2);
- MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-}
-
-/// getMachineBasicBlockAddress - Return the address of the specified
-/// MachineBasicBlock, only usable after the label for the MBB has been
-/// emitted.
-uintptr_t
-ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
- assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
- MBBLocations[MBB->getNumber()] && "MBB not emitted!");
- return MBBLocations[MBB->getNumber()];
-}
-
-/// getJumpTableEntryAddress - Return the address of the jump table with index
-/// 'Index' in the function that last called initJumpTableInfo.
-uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
- assert(JTLocations.size() > Index && "JT not emitted!");
- return JTLocations[Index];
-}
-
-/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
- assert(CPLocations.size() > Index && "CP not emitted!");
- return CPLocations[Index];
-}
-
-/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
- assert(CPSections.size() > Index && "CP not emitted!");
- return CPSections[Index];
-}
-
-} // end namespace llvm
-
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index c05be130ec61..6da313e632af 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -56,11 +56,10 @@ namespace {
}
char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
INITIALIZE_PASS(OptimizePHIs, "opt-phis",
"Optimize machine instruction PHIs", false, false)
-FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
-
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
MRI = &Fn.getRegInfo();
TII = Fn.getTarget().getInstrInfo();
@@ -165,7 +164,11 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
InstrSet PHIsInCycle;
if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
SingleValReg != 0) {
- MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+ unsigned OldReg = MI->getOperand(0).getReg();
+ if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+ continue;
+
+ MRI->replaceRegWith(OldReg, SingleValReg);
MI->eraseFromParent();
++NumPHICycles;
Changed = true;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 6994aa58fbd5..0ed4c34bb105 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -92,11 +92,15 @@ STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
-INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
- "Eliminate PHI nodes for register allocation", false, false)
-
char& llvm::PHIEliminationID = PHIElimination::ID;
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
+
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
@@ -241,7 +245,6 @@ void PHIElimination::LowerAtomicPHINode(
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
// Increment use count of the newly created virtual register.
- VI.NumUses++;
LV->setPHIJoin(IncomingReg);
// When we are reusing the incoming register, it may already have been
@@ -410,7 +413,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
return false; // Quick exit for basic blocks without PHIs.
bool Changed = false;
- for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 315aedddb9ef..53d1fcf7377a 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,62 +12,617 @@
//
//===---------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> EnableBlockPlacement("enable-block-placement",
+ cl::Hidden, cl::desc("Enable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+ cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+ cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault>
+OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+ cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<cl::boolOrDefault>
+EnableMachineSched("enable-misched", cl::Hidden,
+ cl::desc("Enable the machine instruction scheduling pass."));
+static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
+ cl::desc("Use strong PHI elimination."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+ cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static AnalysisID applyDisable(AnalysisID ID, bool Override) {
+ if (Override)
+ return &NoPassID;
+ return ID;
+}
+
+/// Allow Pass selection to be overriden by command line options. This supports
+/// flags with ternary conditions. TargetID is passed through by default. The
+/// pass is suppressed when the option is false. When the option is true, the
+/// StandardID is selected if the target provides no default.
+static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
+ AnalysisID StandardID) {
+ switch (Override) {
+ case cl::BOU_UNSET:
+ return TargetID;
+ case cl::BOU_TRUE:
+ if (TargetID != &NoPassID)
+ return TargetID;
+ if (StandardID == &NoPassID)
+ report_fatal_error("Target cannot enable pass");
+ return StandardID;
+ case cl::BOU_FALSE:
+ return &NoPassID;
+ }
+ llvm_unreachable("Invalid command line option state");
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+ if (StandardID == &PostRASchedulerID)
+ return applyDisable(TargetID, DisablePostRA);
+
+ if (StandardID == &BranchFolderPassID)
+ return applyDisable(TargetID, DisableBranchFold);
+
+ if (StandardID == &TailDuplicateID)
+ return applyDisable(TargetID, DisableTailDuplicate);
+
+ if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+ return applyDisable(TargetID, DisableEarlyTailDup);
+
+ if (StandardID == &MachineBlockPlacementID)
+ return applyDisable(TargetID, DisableCodePlace);
+
+ if (StandardID == &CodePlacementOptID)
+ return applyDisable(TargetID, DisableCodePlace);
+
+ if (StandardID == &StackSlotColoringID)
+ return applyDisable(TargetID, DisableSSC);
+
+ if (StandardID == &DeadMachineInstructionElimID)
+ return applyDisable(TargetID, DisableMachineDCE);
+
+ if (StandardID == &MachineLICMID)
+ return applyDisable(TargetID, DisableMachineLICM);
+
+ if (StandardID == &MachineCSEID)
+ return applyDisable(TargetID, DisableMachineCSE);
+
+ if (StandardID == &MachineSchedulerID)
+ return applyOverride(TargetID, EnableMachineSched, StandardID);
+
+ if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+ return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+ if (StandardID == &MachineSinkingID)
+ return applyDisable(TargetID, DisableMachineSink);
+
+ if (StandardID == &MachineCopyPropagationID)
+ return applyDisable(TargetID, DisableCopyProp);
+
+ return TargetID;
+}
+
//===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+ "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+static char NoPassIDAnchor = 0;
+char &llvm::NoPassID = NoPassIDAnchor;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace llvm {
+class PassConfigImpl {
+public:
+ // List of passes explicitly substituted by this target. Normally this is
+ // empty, but it is a convenient way to suppress or replace specific passes
+ // that are part of a standard pass pipeline without overridding the entire
+ // pipeline. This mechanism allows target options to inherit a standard pass's
+ // user interface. For example, a target may disable a standard pass by
+ // default by substituting NoPass, and the user may still enable that standard
+ // pass with an explicit command line option.
+ DenseMap<AnalysisID,AnalysisID> TargetPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+ delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+ : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false),
+ DisableVerify(false),
+ EnableTailMerge(true) {
+
+ Impl = new PassConfigImpl();
+
+ // Register all target independent codegen passes to activate their PassIDs,
+ // including this pass itself.
+ initializeCodeGen(*PassRegistry::getPassRegistry());
+
+ // Substitute Pseudo Pass IDs for real ones.
+ substitutePass(EarlyTailDuplicateID, TailDuplicateID);
+ substitutePass(PostRAMachineLICMID, MachineLICMID);
+
+ // Temporarily disable experimental passes.
+ substitutePass(MachineSchedulerID, NoPassID);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+ : ImmutablePass(ID), PM(*(PassManagerBase*)0) {
+ llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+ assert(!Initialized && "PassConfig is immutable");
+ Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) {
+ Impl->TargetPasses[&StandardID] = &TargetID;
+}
+
+AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, AnalysisID>::const_iterator
+ I = Impl->TargetPasses.find(ID);
+ if (I == Impl->TargetPasses.end())
+ return ID;
+ return I->second;
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(char &ID) {
+ assert(!Initialized && "PassConfig is immutable");
+
+ AnalysisID TargetID = getPassSubstitution(&ID);
+ AnalysisID FinalID = overridePass(&ID, TargetID);
+ if (FinalID == &NoPassID)
+ return FinalID;
+
+ Pass *P = Pass::createPass(FinalID);
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ PM.add(P);
+ return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const char *Banner) const {
+ if (TM->shouldPrintMachineCode())
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+ if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel();
+
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
///
-/// RegisterRegAlloc class - Track the registration of register allocators.
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ addPass(ExpandISelPseudosID);
+
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMachineSSAOptimization();
+ }
+ else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotAllocationID);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc())
+ printAndVerify("After PreRegAlloc passes");
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (getOptimizeRegAlloc())
+ addOptimizedRegAlloc(createRegAllocPass(true));
+ else
+ addFastRegAlloc(createRegAllocPass(false));
+
+ // Run post-ra passes.
+ if (addPostRegAlloc())
+ printAndVerify("After PostRegAlloc passes");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ addPass(PrologEpilogCodeInserterID);
+ printAndVerify("After PrologEpilogCodeInserter");
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ addMachineLateOptimization();
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(ExpandPostRAPseudosID);
+ printAndVerify("After ExpandPostRAPseudos");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2())
+ printAndVerify("After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(PostRASchedulerID);
+ printAndVerify("After PostRAScheduler");
+ }
+
+ // GC
+ addPass(GCMachineCodeAnalysisID);
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(dbgs()));
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ addBlockPlacement();
+
+ if (addPreEmitPass())
+ printAndVerify("After PreEmit passes");
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(EarlyTailDuplicateID) != &NoPassID)
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(OptimizePHIsID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ addPass(MachineLICMID);
+ addPass(MachineCSEID);
+ addPass(MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+ switch (OptimizeRegAlloc) {
+ case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
MachinePassRegistry RegisterRegAlloc::Registry;
-static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static FunctionPass *useDefaultRegisterAllocator() { return 0; }
static RegisterRegAlloc
defaultRegAlloc("default",
"pick register allocator based on -O option",
- createDefaultRegisterAllocator);
+ useDefaultRegisterAllocator);
-//===---------------------------------------------------------------------===//
-///
-/// RegAlloc command line options.
-///
-//===---------------------------------------------------------------------===//
+/// -regalloc=... command line option.
static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<RegisterRegAlloc> >
RegAlloc("regalloc",
- cl::init(&createDefaultRegisterAllocator),
+ cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use"));
-//===---------------------------------------------------------------------===//
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
///
-/// createRegisterAllocator - choose the appropriate register allocator.
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+ if (Optimized)
+ return createGreedyRegisterAllocator();
+ else
+ return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
///
-//===---------------------------------------------------------------------===//
-FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+ // Initialize the global default.
if (!Ctor) {
Ctor = RegAlloc;
RegisterRegAlloc::setDefault(RegAlloc);
}
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
- // This forces linking of the linear scan register allocator,
- // so -regalloc=linearscan still works in clang.
- if (Ctor == createLinearScanRegisterAllocator)
- return createLinearScanRegisterAllocator();
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ return createTargetRegisterAllocator(Optimized);
+}
- if (Ctor != createDefaultRegisterAllocator)
- return Ctor();
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(PHIEliminationID);
+ addPass(TwoAddressInstructionPassID);
- // When the 'default' allocator is requested, pick one based on OptLevel.
- switch (OptLevel) {
- case CodeGenOpt::None:
- return createFastRegisterAllocator();
- default:
- return createGreedyRegisterAllocator();
+ PM.add(RegAllocPass);
+ printAndVerify("After Register Allocation");
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ // LiveVariables currently requires pure SSA form.
+ //
+ // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+ // LiveVariables can be removed completely, and LiveIntervals can be directly
+ // computed. (We still either need to regenerate kill flags after regalloc, or
+ // preferably fix the scavenger to not depend on them).
+ addPass(LiveVariablesID);
+
+ // Add passes that move from transformed SSA into conventional SSA. This is a
+ // "copy coalescing" problem.
+ //
+ if (!EnableStrongPHIElim) {
+ // Edge splitting is smarter with machine loop info.
+ addPass(MachineLoopInfoID);
+ addPass(PHIEliminationID);
+ }
+ addPass(TwoAddressInstructionPassID);
+
+ // FIXME: Either remove this pass completely, or fix it so that it works on
+ // SSA form. We could modify LiveIntervals to be independent of this pass, But
+ // it would be even better to simply eliminate *all* IMPLICIT_DEFs before
+ // leaving SSA.
+ addPass(ProcessImplicitDefsID);
+
+ if (EnableStrongPHIElim)
+ addPass(StrongPHIEliminationID);
+
+ addPass(RegisterCoalescerID);
+
+ // PreRA instruction scheduling.
+ if (addPass(MachineSchedulerID) != &NoPassID)
+ printAndVerify("After Machine Scheduling");
+
+ // Add the selected register allocation pass.
+ PM.add(RegAllocPass);
+ printAndVerify("After Register Allocation");
+
+ // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
+ // but eventually, all users of it should probably be moved to addPostRA and
+ // it can go away. Currently, it's the intended place for targets to run
+ // FinalizeMachineBundles, because passes other than MachineScheduling an
+ // RegAlloc itself may not be aware of bundles.
+ if (addFinalizeRegAlloc())
+ printAndVerify("After RegAlloc finalization");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(StackSlotColoringID);
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(PostRAMachineLICMID);
+
+ printAndVerify("After StackSlotColoring and postra Machine LICM");
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (addPass(BranchFolderPassID) != &NoPassID)
+ printAndVerify("After BranchFolding");
+
+ // Tail duplication.
+ if (addPass(TailDuplicateID) != &NoPassID)
+ printAndVerify("After TailDuplicate");
+
+ // Copy propagation.
+ if (addPass(MachineCopyPropagationID) != &NoPassID)
+ printAndVerify("After copy propagation pass");
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+ AnalysisID ID = &NoPassID;
+ if (EnableBlockPlacement) {
+ // MachineBlockPlacement is an experimental pass which is disabled by
+ // default currently. Eventually it should subsume CodePlacementOpt, so
+ // when enabled, the other is disabled.
+ ID = addPass(MachineBlockPlacementID);
+ } else {
+ ID = addPass(CodePlacementOptID);
+ }
+ if (ID != &NoPassID) {
+ // Run a separate pass to collect block placement statistics.
+ if (EnableBlockPlacementStats)
+ addPass(MachineBlockPlacementStatsID);
+
+ printAndVerify("After machine block placement.");
}
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index bbc7ce2d0a42..9c5c029000c0 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -39,7 +39,7 @@
// =>
// v1 = bitcast v0
// = v0
-//
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "peephole-opt"
@@ -68,7 +68,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
-STATISTIC(NumImmFold, "Number of move immediate foled");
+STATISTIC(NumImmFold, "Number of move immediate folded");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -109,22 +109,19 @@ namespace {
}
char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
-FunctionPass *llvm::createPeepholeOptimizerPass() {
- return new PeepholeOptimizer();
-}
-
/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
-///
+///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
@@ -134,7 +131,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
-
+
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
@@ -240,6 +237,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
if (PHIBBs.count(UseMBB))
continue;
+ // About to add uses of DstReg, clear DstReg's kill flags.
+ if (!Changed)
+ MRI->clearKillFlags(DstReg);
+
unsigned NewVR = MRI->createVirtualRegister(RC);
BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVR)
@@ -292,7 +293,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
assert(Def && Src && "Malformed bitcast instruction!");
MachineInstr *DefMI = MRI->getVRegDef(Src);
- if (!DefMI || !DefMI->getDesc().isBitcast())
+ if (!DefMI || !DefMI->isBitcast())
return false;
unsigned SrcSrc = 0;
@@ -353,7 +354,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isMoveImmediate())
+ if (!MI->isMoveImmediate())
return false;
if (MCID.getNumDefs() != 1)
return false;
@@ -363,7 +364,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
ImmDefRegs.insert(Reg);
return true;
}
-
+
return false;
}
@@ -395,7 +396,7 @@ bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (DisablePeephole)
return false;
-
+
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -408,7 +409,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
-
+
bool SeenMoveImm = false;
LocalMIs.clear();
ImmDefRegs.clear();
@@ -428,17 +429,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- const MCInstrDesc &MCID = MI->getDesc();
-
- if (MCID.isBitcast()) {
+ if (MI->isBitcast()) {
if (OptimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
MII = First ? I->begin() : llvm::next(PMII);
continue;
- }
- } else if (MCID.isCompare()) {
+ }
+ } else if (MI->isCompare()) {
if (OptimizeCmpInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index c73e87733cb4..24d3e5ab0c9d 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,6 @@
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
#include "RegisterClassInfo.h"
-#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetLowering.h"
@@ -45,7 +45,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
-#include <set>
using namespace llvm;
STATISTIC(NumNoops, "Number of noops inserted");
@@ -82,16 +81,15 @@ namespace {
AliasAnalysis *AA;
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
- CodeGenOpt::Level OptLevel;
public:
static char ID;
- PostRAScheduler(CodeGenOpt::Level ol) :
- MachineFunctionPass(ID), OptLevel(ol) {}
+ PostRAScheduler() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -99,10 +97,6 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
- return "Post RA top-down list latency scheduler";
- }
-
bool runOnMachineFunction(MachineFunction &Fn);
};
char PostRAScheduler::ID = 0;
@@ -130,36 +124,49 @@ namespace {
/// AA - AliasAnalysis for making memory reference queries.
AliasAnalysis *AA;
- /// KillIndices - The index of the most recent kill (proceding bottom-up),
- /// or ~0u if the register is not live.
- std::vector<unsigned> KillIndices;
+ /// LiveRegs - true if the register is live.
+ BitVector LiveRegs;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
public:
SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
AliasAnalysis *AA, const RegisterClassInfo&,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
~SchedulePostRATDList();
- /// StartBlock - Initialize register live-range state for scheduling in
+ /// startBlock - Initialize register live-range state for scheduling in
/// this block.
///
- void StartBlock(MachineBasicBlock *BB);
+ void startBlock(MachineBasicBlock *BB);
+
+ /// Initialize the scheduler state for the next scheduling region.
+ virtual void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ virtual void exitRegion();
/// Schedule - Schedule the instruction range using list scheduling.
///
- void Schedule();
+ void schedule();
+
+ void EmitSchedule();
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void Observe(MachineInstr *MI, unsigned Count);
- /// FinishBlock - Clean up register live-range state.
+ /// finishBlock - Clean up register live-range state.
///
- void FinishBlock();
+ void finishBlock();
/// FixupKills - Fix register kill flags that have been made
/// invalid due to scheduling
@@ -177,16 +184,23 @@ namespace {
// adjustments may be made to the instruction if necessary. Return
// true if the operand has been deleted, false if not.
bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+ void dumpSchedule() const;
};
}
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+ "Post RA top-down list latency scheduler", false, false)
+
SchedulePostRATDList::SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
AliasAnalysis *AA, const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
- KillIndices(TRI->getNumRegs())
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+ LiveRegs(TRI->getNumRegs())
{
const TargetMachine &TM = MF.getTarget();
const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
@@ -204,16 +218,48 @@ SchedulePostRATDList::~SchedulePostRATDList() {
delete AntiDepBreak;
}
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getTarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
RegClassInfo.runOnMachineFunction(Fn);
// Check for explicit enable/disable of post-ra scheduling.
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
- SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
@@ -221,7 +267,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
- if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+ if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
+ CriticalPathRCs))
return false;
}
@@ -248,13 +295,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
static int bbcnt = 0;
if (bbcnt++ % DebugDiv != DebugMod)
continue;
- dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
- ":BB#" << MBB->getNumber() << " ***\n";
+ dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName()
+ << ":BB#" << MBB->getNumber() << " ***\n";
}
#endif
// Initialize register live-range state for scheduling in this block.
- Scheduler.StartBlock(MBB);
+ Scheduler.startBlock(MBB);
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
@@ -262,8 +309,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
MachineInstr *MI = llvm::prior(I);
- if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
- Scheduler.Run(MBB, I, Current, CurrentCount);
+ // Calls are not scheduling boundaries before register allocation, but
+ // post-ra we don't gain anything by scheduling across calls since we
+ // don't need to worry about register pressure.
+ if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
+ Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
Scheduler.EmitSchedule();
Current = MI;
CurrentCount = Count - 1;
@@ -271,15 +323,19 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
}
I = MI;
--Count;
+ if (MI->isBundle())
+ Count -= MI->getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB->begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
- Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
Scheduler.EmitSchedule();
// Clean up register live-range state.
- Scheduler.FinishBlock();
+ Scheduler.finishBlock();
// Update register kills
Scheduler.FixupKills(MBB);
@@ -291,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
///
-void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
// Call the superclass.
- ScheduleDAGInstrs::StartBlock(BB);
+ ScheduleDAGInstrs::startBlock(BB);
// Reset the hazard recognizer and anti-dep breaker.
HazardRec->Reset();
@@ -303,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
/// Schedule - Schedule the instruction range using list scheduling.
///
-void SchedulePostRATDList::Schedule() {
+void SchedulePostRATDList::schedule() {
// Build the scheduling graph.
- BuildSchedGraph(AA);
+ buildSchedGraph(AA);
if (AntiDepBreak != NULL) {
unsigned Broken =
- AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
- InsertPosIndex, DbgValues);
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
if (Broken != 0) {
// We made changes. Update the dependency graph.
@@ -319,11 +375,8 @@ void SchedulePostRATDList::Schedule() {
// the def's anti-dependence *and* output-dependence edges due to
// that register, and add new anti-dependence and output-dependence
// edges based on the next live range of the register.
- SUnits.clear();
- Sequence.clear();
- EntrySU = SUnit();
- ExitSU = SUnit();
- BuildSchedGraph(AA);
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
NumFixedAnti += Broken;
}
@@ -343,38 +396,36 @@ void SchedulePostRATDList::Schedule() {
///
void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
if (AntiDepBreak != NULL)
- AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+ AntiDepBreak->Observe(MI, Count, EndIndex);
}
/// FinishBlock - Clean up register live-range state.
///
-void SchedulePostRATDList::FinishBlock() {
+void SchedulePostRATDList::finishBlock() {
if (AntiDepBreak != NULL)
AntiDepBreak->FinishBlock();
// Call the superclass.
- ScheduleDAGInstrs::FinishBlock();
+ ScheduleDAGInstrs::finishBlock();
}
/// StartBlockForKills - Initialize register live-range state for updating kills
///
void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
- // Initialize the indices to indicate that no registers are live.
- for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
- KillIndices[i] = ~0u;
+ // Start with no live registers.
+ LiveRegs.reset();
// Determine the live-out physregs for this block.
- if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ if (!BB->empty() && BB->back().isReturn()) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
unsigned Reg = *I;
- KillIndices[Reg] = BB->size();
+ LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = BB->size();
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.set(*Subreg);
}
}
else {
@@ -384,12 +435,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- KillIndices[Reg] = BB->size();
+ LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = BB->size();
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.set(*Subreg);
}
}
}
@@ -404,7 +454,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
}
// If MO itself is live, clear the kill flag...
- if (KillIndices[MO.getReg()] != ~0u) {
+ if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
return false;
}
@@ -414,9 +464,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
MO.setIsKill(false);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
- for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg);
*Subreg; ++Subreg) {
- if (KillIndices[*Subreg] != ~0u) {
+ if (LiveRegs.test(*Subreg)) {
MI->addOperand(MachineOperand::CreateReg(*Subreg,
true /*IsDef*/,
true /*IsImp*/,
@@ -437,7 +487,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
- std::set<unsigned> killedRegs;
+ BitVector killedRegs(TRI->getNumRegs());
BitVector ReservedRegs = TRI->getReservedRegs(MF);
StartBlockForKills(MBB);
@@ -455,6 +505,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// are completely defined.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ LiveRegs.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -462,19 +514,18 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
- KillIndices[Reg] = ~0u;
+ LiveRegs.reset(Reg);
// Repeat for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = ~0u;
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.reset(*Subreg);
}
// Examine all used registers and set/clear kill flag. When a
// register is used multiple times we only set the kill flag on
// the first use.
- killedRegs.clear();
+ killedRegs.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
@@ -482,12 +533,12 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
bool kill = false;
- if (killedRegs.find(Reg) == killedRegs.end()) {
+ if (!killedRegs.test(Reg)) {
kill = true;
// A register is not killed if any subregs are live...
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
- if (KillIndices[*Subreg] != ~0u) {
+ if (LiveRegs.test(*Subreg)) {
kill = false;
break;
}
@@ -496,7 +547,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// If subreg is not live, then register is killed if it became
// live in this instruction
if (kill)
- kill = (KillIndices[Reg] == ~0u);
+ kill = !LiveRegs.test(Reg);
}
if (MO.isKill() != kill) {
@@ -506,7 +557,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(MI->dump());
}
- killedRegs.insert(Reg);
+ killedRegs.set(Reg);
}
// Mark any used register (that is not using undef) and subregs as
@@ -517,12 +568,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
- KillIndices[Reg] = Count;
+ LiveRegs.set(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = Count;
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.set(*Subreg);
}
}
}
@@ -585,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
ReleaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue.ScheduledNode(SU);
+ AvailableQueue.scheduledNode(SU);
}
/// ListScheduleTopDown - The main loop of list scheduling for top-down
@@ -699,14 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
-#endif
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
}
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = prior(RegionEnd);
+ }
-FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
- return new PostRAScheduler(OptLevel);
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b1d8c9760225..1ad3479afb4c 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,6 +26,8 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
"Process Implicit Definitions", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
@@ -36,7 +38,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<LiveVariables>();
- AU.addRequired<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
AU.addPreservedID(TwoAddressInstructionPassID);
@@ -50,10 +51,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
SmallSet<unsigned, 8> &ImpDefRegs) {
switch(OpIdx) {
case 1:
- return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+ return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
ImpDefRegs.count(MI->getOperand(0).getReg()));
case 2:
- return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+ return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
ImpDefRegs.count(MI->getOperand(0).getReg()));
default: return false;
}
@@ -66,7 +67,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
MachineOperand &MO1 = MI->getOperand(1);
if (MO1.getReg() != Reg)
return false;
- if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+ if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
return true;
return false;
}
@@ -87,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
TII = fn.getTarget().getInstrInfo();
TRI = fn.getTarget().getRegisterInfo();
MRI = &fn.getRegInfo();
- LV = &getAnalysis<LiveVariables>();
+ LV = getAnalysisIfAvailable<LiveVariables>();
SmallSet<unsigned, 8> ImpDefRegs;
SmallVector<MachineInstr*, 8> ImpDefMIs;
@@ -105,23 +106,24 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineInstr *MI = &*I;
++I;
if (MI->isImplicitDef()) {
- if (MI->getOperand(0).getSubReg())
+ ImpDefMIs.push_back(MI);
+ // Is this a sub-register read-modify-write?
+ if (MI->getOperand(0).readsReg())
continue;
unsigned Reg = MI->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
ImpDefRegs.insert(*SS);
}
- ImpDefMIs.push_back(MI);
continue;
}
// Eliminate %reg1032:sub<def> = COPY undef.
- if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+ if (MI->isCopy() && MI->getOperand(0).readsReg()) {
MachineOperand &MO = MI->getOperand(1);
if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
- if (MO.isKill()) {
+ if (LV && MO.isKill()) {
LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
vi.removeKill(MI);
}
@@ -140,7 +142,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
bool ChangedToImpDef = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+ if (!MO.isReg() || !MO.readsReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
@@ -155,8 +157,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MI->RemoveOperand(j);
if (isKill) {
ImpDefRegs.erase(Reg);
- LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
- vi.removeKill(MI);
+ if (LV) {
+ LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+ vi.removeKill(MI);
+ }
}
ChangedToImpDef = true;
Changed = true;
@@ -172,10 +176,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
continue;
}
if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
- // Make sure other uses of
+ // Make sure other reads of Reg are also marked <undef>.
for (unsigned j = i+1; j != e; ++j) {
MachineOperand &MOJ = MI->getOperand(j);
- if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+ if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
MOJ.setIsUndef();
}
ImpDefRegs.erase(Reg);
@@ -265,7 +269,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
}
// Update LiveVariables varinfo if the instruction is a kill.
- if (isKill) {
+ if (LV && isKill) {
LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
vi.removeKill(RMI);
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32c932552bed..458915ea5d93 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -45,24 +45,22 @@
using namespace llvm;
char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
"Prologue/Epilogue Insertion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(PEI, "prologepilog",
- "Prologue/Epilogue Insertion", false, false)
+ "Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
-/// createPrologEpilogCodeInserter - This function returns a pass that inserts
-/// prolog and epilog code, and eliminates abstract frame references.
-///
-FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
-
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
@@ -71,6 +69,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
@@ -125,6 +125,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
scavengeFrameVirtualRegs(Fn);
+ // Clear any vregs created by virtual scavenging.
+ Fn.getRegInfo().clearVirtRegs();
+
delete RS;
clearAllSets();
return true;
@@ -207,7 +210,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
MachineFrameInfo *MFI = Fn.getFrameInfo();
// Get the callee saved register list...
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
// These are used to keep track the callee-save area. Initialize them.
MinCSFrameIndex = INT_MAX;
@@ -224,17 +227,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+ if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
// If the reg is modified, save it!
CSI.push_back(CalleeSavedInfo(Reg));
- } else {
- for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
- *AliasSet; ++AliasSet) { // Check alias registers too.
- if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
- CSI.push_back(CalleeSavedInfo(Reg));
- break;
- }
- }
}
}
@@ -332,7 +327,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
I = I2;
bool AtStart = I == MBB->begin();
@@ -426,11 +421,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Skip over all terminator instructions, which are part of the
// return sequence.
- if (! I->getDesc().isTerminator()) {
+ if (! I->isTerminator()) {
++I;
} else {
MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
I = I2;
}
}
@@ -698,7 +693,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// Add epilogue to restore the callee-save registers in each exiting block
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn())
+ if (!I->empty() && I->back().isReturn())
TFI.emitEpilogue(Fn, *I);
}
@@ -706,7 +701,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (EnableSegmentedStacks)
+ if (Fn.getTarget().Options.EnableSegmentedStacks)
TFI.adjustForSegmentedStacks(Fn);
}
@@ -813,6 +808,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
/// with physical registers. Use the register scavenger to find an
/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index e2391591ad06..0d140a9bb481 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -40,10 +40,6 @@ namespace llvm {
initializePEIPass(*PassRegistry::getPassRegistry());
}
- const char *getPassName() const {
- return "Prolog/Epilog Insertion & Frame Finalization";
- }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 73b66d868f3d..49599b3ab980 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -87,7 +87,6 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
this == getJumpTable())
return true;
llvm_unreachable("Unknown PseudoSourceValue!");
- return false;
}
bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
@@ -97,7 +96,6 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
this == getJumpTable())
return false;
llvm_unreachable("Unknown PseudoSourceValue!");
- return true;
}
bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..b00eceb17f11
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,280 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides comon functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+ LiveVirtRegBitSet VisitedVRegs;
+ OwningArrayPtr<LiveVirtRegBitSet>
+ unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+ // Verify disjoint unions.
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+ LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+ PhysReg2LiveUnion[PhysReg].verify(VRegs);
+ // Union + intersection test could be done efficiently in one pass, but
+ // don't add a method to SparseBitVector unless we really need it.
+ assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+ VisitedVRegs |= VRegs;
+ }
+
+ // Verify vreg coverage.
+ for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+ liItr != liEnd; ++liItr) {
+ unsigned reg = liItr->first;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+ if (!VRM->hasPhys(reg)) continue; // spilled?
+ unsigned PhysReg = VRM->getPhys(reg);
+ if (!unionVRegs[PhysReg].test(reg)) {
+ dbgs() << "LiveVirtReg " << reg << " not in union " <<
+ TRI->getName(PhysReg) << "\n";
+ llvm_unreachable("unallocated live vreg");
+ }
+ }
+ // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+ unsigned NRegs) {
+ NumRegs = NRegs;
+ Array =
+ static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+ for (unsigned r = 0; r != NRegs; ++r)
+ new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+ NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ MRI->freezeReservedRegs(vrm.getMachineFunction());
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+ PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+ // Cache an interferece query for each physical reg
+ Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+ }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+ if (!Array)
+ return;
+ for (unsigned r = 0; r != NumRegs; ++r)
+ Array[r].~LiveIntervalUnion();
+ free(Array);
+ NumRegs = 0;
+ Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+ for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+ PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+ unsigned RegNum = I->first;
+ LiveInterval &VirtReg = *I->second;
+ if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+ PhysReg2LiveUnion[RegNum].unify(VirtReg);
+ else
+ enqueue(&VirtReg);
+ }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << '\n');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+ ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << '\n');
+ assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+ PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+ VRM->clearVirt(VirtReg.reg);
+ ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << MRI->getRegClass(VirtReg->reg)->getName()
+ << ':' << *VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ const char *Msg = "ran out of registers during register allocation";
+ // Probably caused by an inline asm.
+ MachineInstr *MI;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+ (MI = I.skipInstruction());)
+ if (MI->isInlineAsm())
+ break;
+ if (MI)
+ MI->emitError(Msg);
+ else
+ report_fatal_error(Msg);
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = *I;
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ if (query(VirtReg, *AliasI).checkInterference())
+ return *AliasI;
+ return 0;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+ NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ if (MF->size() <= 1)
+ return;
+
+ LiveIntervalUnion::SegmentIter SI;
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+ if (LiveUnion.empty())
+ continue;
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
+ MachineFunction::iterator MBB = llvm::next(MF->begin());
+ MachineFunction::iterator MFE = MF->end();
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.setMap(LiveUnion.getMap());
+ SI.find(Start);
+ while (SI.valid()) {
+ if (SI.start() <= Start) {
+ if (!MBB->isLiveIn(PhysReg))
+ MBB->addLiveIn(PhysReg);
+ DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
+ << PrintReg(SI.value()->reg, TRI));
+ } else if (SI.start() > Stop)
+ MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+ if (++MBB == MFE)
+ break;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.advanceTo(Start);
+ }
+ DEBUG(dbgs() << '\n');
+ }
+}
+
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 031642117efc..072fe2bdb656 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -49,11 +49,6 @@ class VirtRegMap;
class LiveIntervals;
class Spiller;
-// Forward declare a priority queue of live virtual registers. If an
-// implementation needs to prioritize by anything other than spill weight, then
-// this will become an abstract base class with virtual calls to push/get.
-class LiveVirtRegQueue;
-
/// RegAllocBase provides the register allocation driver and interface that can
/// be extended to add interesting heuristics.
///
@@ -67,7 +62,6 @@ class RegAllocBase {
// registers may have changed.
unsigned UserTag;
-protected:
// Array of LiveIntervalUnions indexed by physical register.
class LiveUnionArray {
unsigned NumRegs;
@@ -88,17 +82,19 @@ protected:
}
};
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- VirtRegMap *VRM;
- LiveIntervals *LIS;
- RegisterClassInfo RegClassInfo;
LiveUnionArray PhysReg2LiveUnion;
// Current queries, one per physreg. They must be reinitialized each time we
// query on a new live virtual register.
OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+protected:
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ RegisterClassInfo RegClassInfo;
+
RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
virtual ~RegAllocBase() {}
@@ -115,16 +111,17 @@ protected:
return Queries[PhysReg];
}
+ // Get direct access to the underlying LiveIntervalUnion for PhysReg.
+ LiveIntervalUnion &getLiveUnion(unsigned PhysReg) {
+ return PhysReg2LiveUnion[PhysReg];
+ }
+
// Invalidate all cached information about virtual registers - live ranges may
// have changed.
void invalidateVirtRegs() { ++UserTag; }
// The top-level driver. The output is a VirtRegMap that us updated with
// physical register assignments.
- //
- // If an implementation wants to override the LiveInterval comparator, we
- // should modify this interface to allow passing in an instance derived from
- // LiveVirtRegQueue.
void allocatePhysRegs();
// Get a temporary reference to a Spiller instance.
@@ -160,12 +157,6 @@ protected:
/// allocation is making progress.
void unassign(LiveInterval &VirtReg, unsigned PhysReg);
- // Helper for spilling all live virtual registers currently unified under preg
- // that interfere with the most recently queried lvr. Return true if spilling
- // was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
-
/// addMBBLiveIns - Add physreg liveins to basic blocks.
void addMBBLiveIns(MachineFunction *);
@@ -183,9 +174,6 @@ public:
private:
void seedLiveRegs();
-
- void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
};
} // end namespace llvm
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 5496d69fd3df..77ee3148f31a 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,18 +15,15 @@
#define DEBUG_TYPE "regalloc"
#include "RegAllocBase.h"
#include "LiveDebugVariables.h"
-#include "LiveIntervalUnion.h"
-#include "LiveRangeEdit.h"
#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -37,35 +34,17 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SparseBitVector.h"
-#endif
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
#include <cstdlib>
#include <queue>
using namespace llvm;
-STATISTIC(NumAssigned , "Number of registers assigned");
-STATISTIC(NumUnassigned , "Number of registers unassigned");
-STATISTIC(NumNewQueued , "Number of new live ranges queued");
-
static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
createBasicRegisterAllocator);
-// Temporary verification option until we can put verification inside
-// MachineVerifier.
-static cl::opt<bool, true>
-VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
- cl::desc("Verify during register allocation"));
-
-const char *RegAllocBase::TimerGroupName = "Register Allocation";
-bool RegAllocBase::VerifyEnabled = false;
-
namespace {
struct CompSpillWeight {
bool operator()(LiveInterval *A, LiveInterval *B) const {
@@ -93,6 +72,11 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
std::auto_ptr<Spiller> SpillerInstance;
std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
CompSpillWeight> Queue;
+
+ // Scratch space. Allocated here to avoid repeated malloc calls in
+ // selectOrSplit().
+ BitVector UsableRegs;
+
public:
RABasic();
@@ -128,6 +112,15 @@ public:
/// Perform register allocation.
virtual bool runOnMachineFunction(MachineFunction &mf);
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
static char ID;
};
@@ -139,8 +132,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -157,9 +150,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
- if (StrongPHIElim)
- AU.addRequiredID(StrongPHIEliminationID);
- AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -178,204 +168,10 @@ void RABasic::releaseMemory() {
RegAllocBase::releaseMemory();
}
-#ifndef NDEBUG
-// Verify each LiveIntervalUnion.
-void RegAllocBase::verify() {
- LiveVirtRegBitSet VisitedVRegs;
- OwningArrayPtr<LiveVirtRegBitSet>
- unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
-
- // Verify disjoint unions.
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
- LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
- PhysReg2LiveUnion[PhysReg].verify(VRegs);
- // Union + intersection test could be done efficiently in one pass, but
- // don't add a method to SparseBitVector unless we really need it.
- assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
- VisitedVRegs |= VRegs;
- }
-
- // Verify vreg coverage.
- for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
- liItr != liEnd; ++liItr) {
- unsigned reg = liItr->first;
- if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
- if (!VRM->hasPhys(reg)) continue; // spilled?
- unsigned PhysReg = VRM->getPhys(reg);
- if (!unionVRegs[PhysReg].test(reg)) {
- dbgs() << "LiveVirtReg " << reg << " not in union " <<
- TRI->getName(PhysReg) << "\n";
- llvm_unreachable("unallocated live vreg");
- }
- }
- // FIXME: I'm not sure how to verify spilled intervals.
-}
-#endif //!NDEBUG
-
-//===----------------------------------------------------------------------===//
-// RegAllocBase Implementation
-//===----------------------------------------------------------------------===//
-
-// Instantiate a LiveIntervalUnion for each physical register.
-void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
- unsigned NRegs) {
- NumRegs = NRegs;
- Array =
- static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
- for (unsigned r = 0; r != NRegs; ++r)
- new(Array + r) LiveIntervalUnion(r, allocator);
-}
-
-void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
- NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
- TRI = &vrm.getTargetRegInfo();
- MRI = &vrm.getRegInfo();
- VRM = &vrm;
- LIS = &lis;
- RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
-
- const unsigned NumRegs = TRI->getNumRegs();
- if (NumRegs != PhysReg2LiveUnion.numRegs()) {
- PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
- // Cache an interferece query for each physical reg
- Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
- }
-}
-
-void RegAllocBase::LiveUnionArray::clear() {
- if (!Array)
- return;
- for (unsigned r = 0; r != NumRegs; ++r)
- Array[r].~LiveIntervalUnion();
- free(Array);
- NumRegs = 0;
- Array = 0;
-}
-
-void RegAllocBase::releaseMemory() {
- for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
- PhysReg2LiveUnion[r].clear();
-}
-
-// Visit all the live registers. If they are already assigned to a physical
-// register, unify them with the corresponding LiveIntervalUnion, otherwise push
-// them on the priority queue for later assignment.
-void RegAllocBase::seedLiveRegs() {
- NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
- for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
- unsigned RegNum = I->first;
- LiveInterval &VirtReg = *I->second;
- if (TargetRegisterInfo::isPhysicalRegister(RegNum))
- PhysReg2LiveUnion[RegNum].unify(VirtReg);
- else
- enqueue(&VirtReg);
- }
-}
-
-void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
- << " to " << PrintReg(PhysReg, TRI) << '\n');
- assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
- VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
- PhysReg2LiveUnion[PhysReg].unify(VirtReg);
- ++NumAssigned;
-}
-
-void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
- << " from " << PrintReg(PhysReg, TRI) << '\n');
- assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
- PhysReg2LiveUnion[PhysReg].extract(VirtReg);
- VRM->clearVirt(VirtReg.reg);
- ++NumUnassigned;
-}
-
-// Top-level driver to manage the queue of unassigned VirtRegs and call the
-// selectOrSplit implementation.
-void RegAllocBase::allocatePhysRegs() {
- seedLiveRegs();
-
- // Continue assigning vregs one at a time to available physical registers.
- while (LiveInterval *VirtReg = dequeue()) {
- assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
-
- // Unused registers can appear when the spiller coalesces snippets.
- if (MRI->reg_nodbg_empty(VirtReg->reg)) {
- DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
- LIS->removeInterval(VirtReg->reg);
- continue;
- }
-
- // Invalidate all interference queries, live ranges could have changed.
- invalidateVirtRegs();
-
- // selectOrSplit requests the allocator to return an available physical
- // register if possible and populate a list of new live intervals that
- // result from splitting.
- DEBUG(dbgs() << "\nselectOrSplit "
- << MRI->getRegClass(VirtReg->reg)->getName()
- << ':' << *VirtReg << '\n');
- typedef SmallVector<LiveInterval*, 4> VirtRegVec;
- VirtRegVec SplitVRegs;
- unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
-
- if (AvailablePhysReg == ~0u) {
- // selectOrSplit failed to find a register!
- const char *Msg = "ran out of registers during register allocation";
- // Probably caused by an inline asm.
- MachineInstr *MI;
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
- (MI = I.skipInstruction());)
- if (MI->isInlineAsm())
- break;
- if (MI)
- MI->emitError(Msg);
- else
- report_fatal_error(Msg);
- // Keep going after reporting the error.
- VRM->assignVirt2Phys(VirtReg->reg,
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
- continue;
- }
-
- if (AvailablePhysReg)
- assign(*VirtReg, AvailablePhysReg);
-
- for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
- I != E; ++I) {
- LiveInterval *SplitVirtReg = *I;
- assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
- if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
- DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
- LIS->removeInterval(SplitVirtReg->reg);
- continue;
- }
- DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
- "expect split value in virtual register");
- enqueue(SplitVirtReg);
- ++NumNewQueued;
- }
- }
-}
-
-// Check if this live virtual register interferes with a physical register. If
-// not, then check for interference on each register that aliases with the
-// physical register. Return the interfering register.
-unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
- if (query(VirtReg, *AliasI).checkInterference())
- return *AliasI;
- return 0;
-}
-
-// Helper for spillInteferences() that spills all interfering vregs currently
+// Helper for spillInterferences() that spills all interfering vregs currently
// assigned to this physical register.
-void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
assert(Q.seenAllInterferences() && "need collectInterferences()");
const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
@@ -391,7 +187,7 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
unassign(SpilledVReg, PhysReg);
// Spill the extracted interval.
- LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+ LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
}
// After extracting segments, the query's results are invalid. But keep the
@@ -402,14 +198,13 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool
-RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
unsigned NumInterferences = 0;
// Collect interferences assigned to any alias of the physical register.
- for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+ for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
NumInterferences += QAlias.collectInterferingVRegs();
if (QAlias.seenUnspillableVReg()) {
@@ -421,52 +216,11 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
assert(NumInterferences > 0 && "expect interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
spillReg(VirtReg, *AliasI, SplitVRegs);
return true;
}
-// Add newly allocated physical registers to the MBB live in sets.
-void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
- NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
- SlotIndexes *Indexes = LIS->getSlotIndexes();
- if (MF->size() <= 1)
- return;
-
- LiveIntervalUnion::SegmentIter SI;
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
- if (LiveUnion.empty())
- continue;
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
- MachineFunction::iterator MBB = llvm::next(MF->begin());
- MachineFunction::iterator MFE = MF->end();
- SlotIndex Start, Stop;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.setMap(LiveUnion.getMap());
- SI.find(Start);
- while (SI.valid()) {
- if (SI.start() <= Start) {
- if (!MBB->isLiveIn(PhysReg))
- MBB->addLiveIn(PhysReg);
- DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
- << PrintReg(SI.value()->reg, TRI));
- } else if (SI.start() > Stop)
- MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
- if (++MBB == MFE)
- break;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.advanceTo(Start);
- }
- DEBUG(dbgs() << '\n');
- }
-}
-
-
-//===----------------------------------------------------------------------===//
-// RABasic Implementation
-//===----------------------------------------------------------------------===//
-
// Driver for the register assignment and splitting heuristics.
// Manages iteration over the LiveIntervalUnions.
//
@@ -481,6 +235,10 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
// selectOrSplit().
unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Check for register mask interference. When live ranges cross calls, the
+ // set of usable registers is reduced to the callee-saved ones.
+ bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs);
+
// Populate a list of physical register spill candidates.
SmallVector<unsigned, 8> PhysRegSpillCands;
@@ -491,6 +249,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
++I) {
unsigned PhysReg = *I;
+ // If PhysReg is clobbered by a register mask, it isn't useful for
+ // allocation or spilling.
+ if (CrossRegMasks && !UsableRegs.test(PhysReg))
+ continue;
+
// Check interference and as a side effect, intialize queries for this
// VirtReg and its aliases.
unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
@@ -498,9 +261,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
// Found an available register.
return PhysReg;
}
- Queries[interfReg].collectInterferingVRegs(1);
- LiveInterval *interferingVirtReg =
- Queries[interfReg].interferingVRegs().front();
+ LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg);
+ IntfQ.collectInterferingVRegs(1);
+ LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front();
// The current VirtReg must either be spillable, or one of its interferences
// must have less spill weight.
@@ -524,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(VirtReg, SplitVRegs);
+ LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
@@ -579,7 +342,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
// Write out new DBG_VALUE instructions.
getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
- // The pass output is in VirtRegMap. Release all the transient data.
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
releaseMemory();
return true;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index b36a445291b7..e09b7f8d26be 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -49,10 +50,7 @@ namespace {
public:
static char ID;
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
- isBulkSpilling(false) {
- initializePHIEliminationPass(*PassRegistry::getPassRegistry());
- initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
- }
+ isBulkSpilling(false) {}
private:
const TargetMachine *TM;
MachineFunction *MF;
@@ -71,16 +69,20 @@ namespace {
// Everything we know about a live virtual register.
struct LiveReg {
MachineInstr *LastUse; // Last instr to use reg.
+ unsigned VirtReg; // Virtual register number.
unsigned PhysReg; // Currently held here.
unsigned short LastOpNum; // OpNum on LastUse.
bool Dirty; // Register needs spill.
- LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
- Dirty(false) {}
+ explicit LiveReg(unsigned v)
+ : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+
+ unsigned getSparseSetKey() const {
+ return TargetRegisterInfo::virtReg2Index(VirtReg);
+ }
};
- typedef DenseMap<unsigned, LiveReg> LiveRegMap;
- typedef LiveRegMap::value_type LiveRegEntry;
+ typedef SparseSet<LiveReg> LiveRegMap;
// LiveVirtRegs - This map contains entries for each virtual register
// that is currently available in a physical register.
@@ -137,8 +139,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(PHIEliminationID);
- AU.addRequiredID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -159,14 +159,23 @@ namespace {
void usePhysReg(MachineOperand&);
void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
unsigned calcSpillCost(unsigned PhysReg) const;
- void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
- void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+ void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+ LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+ LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+ unsigned Hint);
LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
void spillAll(MachineInstr *MI);
bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ void addRetOperands(MachineBasicBlock *MBB);
};
char RAFast::ID = 0;
}
@@ -222,10 +231,10 @@ void RAFast::addKillFlag(const LiveReg &LR) {
/// killVirtReg - Mark virtreg as no longer available.
void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
- addKillFlag(LRI->second);
- const LiveReg &LR = LRI->second;
- assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
- PhysRegState[LR.PhysReg] = regFree;
+ addKillFlag(*LRI);
+ assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+ "Broken RegState mapping");
+ PhysRegState[LRI->PhysReg] = regFree;
// Erase from LiveVirtRegs unless we're spilling in bulk.
if (!isBulkSpilling)
LiveVirtRegs.erase(LRI);
@@ -235,7 +244,7 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
void RAFast::killVirtReg(unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
- LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end())
killVirtReg(LRI);
}
@@ -245,7 +254,7 @@ void RAFast::killVirtReg(unsigned VirtReg) {
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
- LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
spillVirtReg(MI, LRI);
}
@@ -253,18 +262,18 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
/// spillVirtReg - Do the actual work of spilling.
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
LiveRegMap::iterator LRI) {
- LiveReg &LR = LRI->second;
- assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+ LiveReg &LR = *LRI;
+ assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
// instruction, not on the spill.
bool SpillKill = LR.LastUse != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
<< " in " << PrintReg(LR.PhysReg, TRI));
- const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
- int FI = getStackSpaceFor(LRI->first, RC);
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+ int FI = getStackSpaceFor(LRI->VirtReg, RC);
DEBUG(dbgs() << " to stack slot #" << FI << "\n");
TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
++NumStores; // Update statistics
@@ -272,7 +281,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// If this register is used by DBG_VALUE then insert new DBG_VALUE to
// identify spilled location as the place to find corresponding variable's
// value.
- SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+ SmallVector<MachineInstr *, 4> &LRIDbgValues =
+ LiveDbgValueMap[LRI->VirtReg];
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
const MDNode *MDPtr =
@@ -295,8 +305,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
}
}
- // Now this register is spilled there is should not be any DBG_VALUE pointing
- // to this register because they are all pointing to spilled value now.
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
LRIDbgValues.clear();
if (SpillKill)
LR.LastUse = 0; // Don't kill register again
@@ -343,7 +354,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
}
// Maybe a superregister is reserved?
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
switch (PhysRegState[Alias]) {
case regDisabled:
@@ -397,7 +408,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// This is a disabled register, disable all aliases.
PhysRegState[PhysReg] = NewState;
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
@@ -435,14 +446,17 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
<< PrintReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
- default:
- return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ return I->Dirty ? spillDirty : spillClean;
+ }
}
// This is a disabled register, add up cost of aliases.
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
if (UsedInInstr.test(Alias))
return spillImpossible;
@@ -454,10 +468,13 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
break;
case regReserved:
return spillImpossible;
- default:
- Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ Cost += I->Dirty ? spillDirty : spillClean;
break;
}
+ }
}
return Cost;
}
@@ -467,17 +484,27 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
/// that PhysReg is the proper container for VirtReg now. The physical
/// register must not be used for anything else when this is called.
///
-void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
- DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
<< PrintReg(PhysReg, TRI) << "\n");
- PhysRegState[PhysReg] = LRE.first;
- assert(!LRE.second.PhysReg && "Already assigned a physreg");
- LRE.second.PhysReg = PhysReg;
+ PhysRegState[PhysReg] = LR.VirtReg;
+ assert(!LR.PhysReg && "Already assigned a physreg");
+ LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
}
/// allocVirtReg - Allocate a physical register for VirtReg.
-void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
- const unsigned VirtReg = LRE.first;
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+ LiveRegMap::iterator LRI,
+ unsigned Hint) {
+ const unsigned VirtReg = LRI->VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
@@ -496,7 +523,9 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
if (Cost < spillDirty) {
if (Cost)
definePhysReg(MI, Hint, regFree);
- return assignVirtToPhysReg(LRE, Hint);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, Hint);
}
}
@@ -505,8 +534,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
// First try to find a completely free register.
for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
- return assignVirtToPhysReg(LRE, PhysReg);
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+ }
}
DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
@@ -519,21 +550,25 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
DEBUG(dbgs() << "\tCost: " << Cost << "\n");
DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
// Cost is 0 when all aliases are already disabled.
- if (Cost == 0)
- return assignVirtToPhysReg(LRE, *I);
+ if (Cost == 0) {
+ assignVirtToPhysReg(*LRI, *I);
+ return LRI;
+ }
if (Cost < BestCost)
BestReg = *I, BestCost = Cost;
}
if (BestReg) {
definePhysReg(MI, BestReg, regFree);
- return assignVirtToPhysReg(LRE, BestReg);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, BestReg);
}
// Nothing we can do. Report an error and keep going with a bad allocation.
MI->emitError("ran out of registers during register allocation");
definePhysReg(MI, *AO.begin(), regFree);
- assignVirtToPhysReg(LRE, *AO.begin());
+ return assignVirtToPhysReg(VirtReg, *AO.begin());
}
/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -544,8 +579,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
- tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
- LiveReg &LR = LRI->second;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (New) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
@@ -555,18 +589,18 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
}
- allocVirtReg(MI, *LRI, Hint);
- } else if (LR.LastUse) {
+ LRI = allocVirtReg(MI, LRI, Hint);
+ } else if (LRI->LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
- if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
- addKillFlag(LR);
+ if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ addKillFlag(*LRI);
}
- assert(LR.PhysReg && "Register not assigned");
- LR.LastUse = MI;
- LR.LastOpNum = OpNum;
- LR.Dirty = true;
- UsedInInstr.set(LR.PhysReg);
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ LRI->Dirty = true;
+ UsedInInstr.set(LRI->PhysReg);
return LRI;
}
@@ -578,18 +612,17 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
- tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
- LiveReg &LR = LRI->second;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
MachineOperand &MO = MI->getOperand(OpNum);
if (New) {
- allocVirtReg(MI, *LRI, Hint);
+ LRI = allocVirtReg(MI, LRI, Hint);
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
- << PrintReg(LR.PhysReg, TRI) << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+ << PrintReg(LRI->PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
++NumLoads;
- } else if (LR.Dirty) {
+ } else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
DEBUG(dbgs() << "Killing last use: " << MO << "\n");
if (MO.isUse())
@@ -614,10 +647,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
MO.setIsDead(false);
}
- assert(LR.PhysReg && "Register not assigned");
- LR.LastUse = MI;
- LR.LastOpNum = OpNum;
- UsedInInstr.set(LR.PhysReg);
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ UsedInInstr.set(LRI->PhysReg);
return LRI;
}
@@ -674,7 +707,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
UsedInInstr.set(Reg);
if (ThroughRegs.count(PhysRegState[Reg]))
definePhysReg(MI, Reg, regFree);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
UsedInInstr.set(*AS);
if (ThroughRegs.count(PhysRegState[*AS]))
definePhysReg(MI, *AS, regFree);
@@ -682,7 +715,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
}
SmallVector<unsigned, 8> PartialDefs;
- DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+ DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -694,7 +727,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
<< DefIdx << ".\n");
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
- unsigned PhysReg = LRI->second.PhysReg;
+ unsigned PhysReg = LRI->PhysReg;
setPhysReg(MI, i, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
@@ -703,16 +736,25 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
- PartialDefs.push_back(LRI->second.PhysReg);
- } else if (MO.isEarlyClobber()) {
- // Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
- unsigned PhysReg = LRI->second.PhysReg;
- if (setPhysReg(MI, i, PhysReg))
- VirtDead.push_back(Reg);
+ PartialDefs.push_back(LRI->PhysReg);
}
}
+ DEBUG(dbgs() << "Allocating early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (!MO.isEarlyClobber())
+ continue;
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+
// Restore UsedInInstr to a state usable for allocating normal virtual uses.
UsedInInstr.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -730,32 +772,66 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
UsedInInstr.set(PartialDefs[i]);
}
-void RAFast::AllocateBasicBlock() {
- DEBUG(dbgs() << "\nAllocating " << *MBB);
+/// addRetOperand - ensure that a return instruction has an operand for each
+/// value live out of the function.
+///
+/// Things marked both call and return are tail calls; do not do this for them.
+/// The tail callee need not take the same registers as input that it produces
+/// as output, and there are dependencies for its input registers elsewhere.
+///
+/// FIXME: This should be done as part of instruction selection, and this helper
+/// should be deleted. Until then, we use custom logic here to create the proper
+/// operand under all circumstances. We can't use addRegisterKilled because that
+/// doesn't make sense for undefined values. We can't simply avoid calling it
+/// for undefined values, because we must ensure that the operand always exists.
+void RAFast::addRetOperands(MachineBasicBlock *MBB) {
+ if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall())
+ return;
+
+ MachineInstr *MI = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MBB->getParent()->getRegInfo().liveout_begin(),
+ E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Cannot have a live-out virtual register.");
+
+ bool hasDef = PhysRegState[Reg] == regReserved;
+
+ // Check if this register already has an operand.
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+
+ unsigned OperReg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(OperReg))
+ continue;
- // FIXME: This should probably be added by instruction selection instead?
- // If the last instruction in the block is a return, make sure to mark it as
- // using all of the live-out values in the function. Things marked both call
- // and return are tail calls; do not do this for them. The tail callee need
- // not take the same registers as input that it produces as output, and there
- // are dependencies for its input registers elsewhere.
- if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
- !MBB->back().getDesc().isCall()) {
- MachineInstr *Ret = &MBB->back();
-
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
- "Cannot have a live-out virtual register.");
-
- // Add live-out registers as implicit uses.
- Ret->addRegisterKilled(*I, TRI, true);
+ if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) {
+ // If the ret already has an operand for this physreg or a superset,
+ // don't duplicate it. Set the kill flag if the value is defined.
+ if (hasDef && !MO.isKill())
+ MO.setIsKill();
+ Found = true;
+ break;
+ }
}
+ if (!Found)
+ MI->addOperand(MachineOperand::CreateReg(Reg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ hasDef/*IsKill*/));
}
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
- assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+ assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
MachineBasicBlock::iterator MII = MBB->begin();
@@ -783,25 +859,26 @@ void RAFast::AllocateBasicBlock() {
case regReserved:
dbgs() << "*";
break;
- default:
+ default: {
dbgs() << '=' << PrintReg(PhysRegState[Reg]);
- if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+ LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ if (I->Dirty)
dbgs() << "*";
- assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
- "Bad inverse map");
+ assert(I->PhysReg == Reg && "Bad inverse map");
break;
}
+ }
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
e = LiveVirtRegs.end(); i != e; ++i) {
- assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+ assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
"Bad map key");
- assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+ assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
"Bad map value");
- assert(PhysRegState[i->second.PhysReg] == i->first &&
- "Bad inverse map");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
});
@@ -815,10 +892,9 @@ void RAFast::AllocateBasicBlock() {
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
- LiveDbgValueMap[Reg].push_back(MI);
- LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
if (LRI != LiveVirtRegs.end())
- setPhysReg(MI, i, LRI->second.PhysReg);
+ setPhysReg(MI, i, LRI->PhysReg);
else {
int SS = StackSlotForVirtReg[Reg];
if (SS == -1) {
@@ -849,6 +925,7 @@ void RAFast::AllocateBasicBlock() {
}
}
}
+ LiveDbgValueMap[Reg].push_back(MI);
}
}
// Next instruction.
@@ -932,7 +1009,7 @@ void RAFast::AllocateBasicBlock() {
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
- unsigned PhysReg = LRI->second.PhysReg;
+ unsigned PhysReg = LRI->PhysReg;
CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
if (setPhysReg(MI, i, PhysReg))
killVirtReg(LRI);
@@ -953,13 +1030,13 @@ void RAFast::AllocateBasicBlock() {
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
UsedInInstr.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
UsedInInstr.set(*AS);
}
}
unsigned DefOpEnd = MI->getNumOperands();
- if (MCID.isCall()) {
+ if (MI->isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
// exception is thrown, the landing pad is going to expect to find
// registers in their spill slots, and 2. we don't have to wade through
@@ -988,7 +1065,7 @@ void RAFast::AllocateBasicBlock() {
continue;
}
LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
- unsigned PhysReg = LRI->second.PhysReg;
+ unsigned PhysReg = LRI->PhysReg;
if (setPhysReg(MI, i, PhysReg)) {
VirtDead.push_back(Reg);
CopyDst = 0; // cancel coalescing;
@@ -1024,6 +1101,9 @@ void RAFast::AllocateBasicBlock() {
MBB->erase(Coalesced[i]);
NumCopies += Coalesced.size();
+ // addRetOperands must run after we've seen all defs in this block.
+ addRetOperands(MBB);
+
DEBUG(MBB->dump());
}
@@ -1038,12 +1118,16 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
TM = &Fn.getTarget();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
+ MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.resize(TRI->getNumRegs());
+ assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
@@ -1052,16 +1136,17 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
AllocateBasicBlock();
}
- // Make sure the set of used physregs is closed under subreg operations.
- MRI->closePhysRegsUsed(*TRI);
-
// Add the clobber lists for all the instructions we skipped earlier.
for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
- if (const unsigned *Defs = (*I)->getImplicitDefs())
+ if (const uint16_t *Defs = (*I)->getImplicitDefs())
while (*Defs)
MRI->setPhysRegUsed(*Defs++);
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+
SkippedInstrs.clear();
StackSlotForVirtReg.clear();
LiveDbgValueMap.clear();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index f54a2c85d100..3f2a617100c3 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,7 +16,6 @@
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
#include "RegAllocBase.h"
#include "Spiller.h"
#include "SpillPlacement.h"
@@ -29,6 +28,7 @@
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -168,6 +168,19 @@ class RAGreedy : public MachineFunctionPass,
}
};
+ // Register mask interference. The current VirtReg is checked for register
+ // mask interference on entry to selectOrSplit(). If there is no
+ // interference, UsableRegs is left empty. If there is interference,
+ // UsableRegs has a bit mask of registers that can be used without register
+ // mask interference.
+ BitVector UsableRegs;
+
+ /// clobberedByRegMask - Returns true if PhysReg is not directly usable
+ /// because of register mask clobbers.
+ bool clobberedByRegMask(unsigned PhysReg) const {
+ return !UsableRegs.empty() && !UsableRegs.test(PhysReg);
+ }
+
// splitting state.
std::auto_ptr<SplitAnalysis> SA;
std::auto_ptr<SplitEditor> SE;
@@ -248,7 +261,6 @@ public:
static char ID;
private:
- void LRE_WillEraseInstruction(MachineInstr*);
bool LRE_CanEraseVirtReg(unsigned);
void LRE_WillShrinkVirtReg(unsigned);
void LRE_DidCloneVirtReg(unsigned, unsigned);
@@ -308,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -328,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
- if (StrongPHIElim)
- AU.addRequiredID(StrongPHIEliminationID);
- AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -350,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
// LiveRangeEdit delegate methods
//===----------------------------------------------------------------------===//
-void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
- // LRE itself will remove from SlotIndexes and parent basic block.
- VRM->RemoveMachineInstrFromMaps(MI);
-}
-
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
unassign(LIS->getInterval(VirtReg), PhysReg);
@@ -424,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) {
Prio |= (1u << 30);
}
- Queue.push(std::make_pair(Prio, Reg));
+ Queue.push(std::make_pair(Prio, ~Reg));
}
LiveInterval *RAGreedy::dequeue() {
if (Queue.empty())
return 0;
- LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+ LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
Queue.pop();
return LI;
}
@@ -446,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Order.rewind();
unsigned PhysReg;
- while ((PhysReg = Order.next()))
+ while ((PhysReg = Order.next())) {
+ if (clobberedByRegMask(PhysReg))
+ continue;
if (!checkPhysRegInterference(VirtReg, PhysReg))
break;
+ }
if (!PhysReg || Order.isHint(PhysReg))
return PhysReg;
@@ -457,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
- if (Order.isHint(Hint)) {
+ if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) {
DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
EvictionCost MaxCost(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -532,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
Cascade = NextCascade;
EvictionCost Cost;
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
// If there is 10 or more interferences, chances are one is heavier.
if (Q.collectInterferingVRegs(10) >= 10)
@@ -590,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
@@ -629,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
Order.rewind();
while (unsigned PhysReg = Order.next()) {
+ if (clobberedByRegMask(PhysReg))
+ continue;
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1118,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
}
--NumCands;
GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
}
if (GlobalCand.size() <= NumCands)
@@ -1172,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
// Prepare split editor.
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1220,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1268,7 +1279,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
SmallVectorImpl<float> &GapWeight) {
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
- const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
const unsigned NumGaps = Uses.size()-1;
// Start and end points for the interference check.
@@ -1280,7 +1291,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
- for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
.checkInterference())
continue;
@@ -1292,7 +1303,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
// surrounding the instruction. The exception is interference before
// StartIdx and after StopIdx.
//
- LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+ LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1329,7 +1340,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// that the interval is continuous from FirstInstr to LastInstr. We should
// make sure that we don't do anything illegal to such an interval, though.
- const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
if (Uses.size() <= 2)
return 0;
const unsigned NumGaps = Uses.size()-1;
@@ -1337,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DEBUG({
dbgs() << "tryLocalSplit: ";
for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- dbgs() << ' ' << SA->UseSlots[i];
+ dbgs() << ' ' << Uses[i];
dbgs() << '\n';
});
+ // If VirtReg is live across any register mask operands, compute a list of
+ // gaps with register masks.
+ SmallVector<unsigned, 8> RegMaskGaps;
+ if (!UsableRegs.empty()) {
+ // Get regmask slots for the whole block.
+ ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+ DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ // Constrain to VirtReg's live range.
+ unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+ Uses.front().getRegSlot()) - RMS.begin();
+ unsigned re = RMS.size();
+ for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+ // Look for Uses[i] <= RMS <= Uses[i+1].
+ assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+ if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ continue;
+ // Skip a regmask on the same instruction as the last use. It doesn't
+ // overlap the live range.
+ if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ break;
+ DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+ RegMaskGaps.push_back(i);
+ // Advance ri to the next gap. A regmask on one of the uses counts in
+ // both gaps.
+ while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+ ++ri;
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
// Since we allow local split results to be split again, there is a risk of
// creating infinite loops. It is tempting to require that the new live
// ranges have less instructions than the original. That would guarantee
@@ -1375,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
calcGapWeights(PhysReg, GapWeight);
+ // Remove any gaps with regmask clobbers.
+ if (clobberedByRegMask(PhysReg))
+ for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+ GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -1466,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit);
SE->openIntv();
@@ -1553,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // Check if VirtReg is live across any calls.
+ UsableRegs.clear();
+ if (LIS->checkRegMaskInterference(VirtReg, UsableRegs))
+ DEBUG(dbgs() << "Live across regmasks.\n");
+
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1593,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// Finally spill VirtReg itself.
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+ LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
@@ -1628,7 +1679,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
- IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+ IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
allocatePhysRegs();
@@ -1647,7 +1698,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DebugVars->emitDebugValues(VRM);
}
- // The pass output is in VirtRegMap. Release all the transient data.
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
releaseMemory();
return true;
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
deleted file mode 100644
index ce3fb90b1126..000000000000
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ /dev/null
@@ -1,1543 +0,0 @@
-//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a linear scan register allocator.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regalloc"
-#include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
-#include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
-#include "RegisterClassInfo.h"
-#include "Spiller.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <queue>
-#include <memory>
-#include <cmath>
-
-using namespace llvm;
-
-STATISTIC(NumIters , "Number of iterations performed");
-STATISTIC(NumBacktracks, "Number of times we had to backtrack");
-STATISTIC(NumCoalesce, "Number of copies coalesced");
-STATISTIC(NumDowngrade, "Number of registers downgraded");
-
-static cl::opt<bool>
-NewHeuristic("new-spilling-heuristic",
- cl::desc("Use new spilling heuristic"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-TrivCoalesceEnds("trivial-coalesce-ends",
- cl::desc("Attempt trivial coalescing of interval ends"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-AvoidWAWHazard("avoid-waw-hazard",
- cl::desc("Avoid write-write hazards for some register classes"),
- cl::init(false), cl::Hidden);
-
-static RegisterRegAlloc
-linearscanRegAlloc("linearscan", "linear scan register allocator",
- createLinearScanRegisterAllocator);
-
-namespace {
- // When we allocate a register, add it to a fixed-size queue of
- // registers to skip in subsequent allocations. This trades a small
- // amount of register pressure and increased spills for flexibility in
- // the post-pass scheduler.
- //
- // Note that in a the number of registers used for reloading spills
- // will be one greater than the value of this option.
- //
- // One big limitation of this is that it doesn't differentiate between
- // different register classes. So on x86-64, if there is xmm register
- // pressure, it can caused fewer GPRs to be held in the queue.
- static cl::opt<unsigned>
- NumRecentlyUsedRegs("linearscan-skip-count",
- cl::desc("Number of registers for linearscan to remember"
- "to skip."),
- cl::init(0),
- cl::Hidden);
-
- struct RALinScan : public MachineFunctionPass {
- static char ID;
- RALinScan() : MachineFunctionPass(ID) {
- initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(
- *PassRegistry::getPassRegistry());
- initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-
- // Initialize the queue to record recently-used registers.
- if (NumRecentlyUsedRegs > 0)
- RecentRegs.resize(NumRecentlyUsedRegs, 0);
- RecentNext = RecentRegs.begin();
- avoidWAW_ = 0;
- }
-
- typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
- typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
- private:
- /// RelatedRegClasses - This structure is built the first time a function is
- /// compiled, and keeps track of which register classes have registers that
- /// belong to multiple classes or have aliases that are in other classes.
- EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
- DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
-
- // NextReloadMap - For each register in the map, it maps to the another
- // register which is defined by a reload from the same stack slot and
- // both reloads are in the same basic block.
- DenseMap<unsigned, unsigned> NextReloadMap;
-
- // DowngradedRegs - A set of registers which are being "downgraded", i.e.
- // un-favored for allocation.
- SmallSet<unsigned, 8> DowngradedRegs;
-
- // DowngradeMap - A map from virtual registers to physical registers being
- // downgraded for the virtual registers.
- DenseMap<unsigned, unsigned> DowngradeMap;
-
- MachineFunction* mf_;
- MachineRegisterInfo* mri_;
- const TargetMachine* tm_;
- const TargetRegisterInfo* tri_;
- const TargetInstrInfo* tii_;
- BitVector allocatableRegs_;
- BitVector reservedRegs_;
- LiveIntervals* li_;
- MachineLoopInfo *loopInfo;
- RegisterClassInfo RegClassInfo;
-
- /// handled_ - Intervals are added to the handled_ set in the order of their
- /// start value. This is uses for backtracking.
- std::vector<LiveInterval*> handled_;
-
- /// fixed_ - Intervals that correspond to machine registers.
- ///
- IntervalPtrs fixed_;
-
- /// active_ - Intervals that are currently being processed, and which have a
- /// live range active for the current point.
- IntervalPtrs active_;
-
- /// inactive_ - Intervals that are currently being processed, but which have
- /// a hold at the current point.
- IntervalPtrs inactive_;
-
- typedef std::priority_queue<LiveInterval*,
- SmallVector<LiveInterval*, 64>,
- greater_ptr<LiveInterval> > IntervalHeap;
- IntervalHeap unhandled_;
-
- /// regUse_ - Tracks register usage.
- SmallVector<unsigned, 32> regUse_;
- SmallVector<unsigned, 32> regUseBackUp_;
-
- /// vrm_ - Tracks register assignments.
- VirtRegMap* vrm_;
-
- std::auto_ptr<VirtRegRewriter> rewriter_;
-
- std::auto_ptr<Spiller> spiller_;
-
- // The queue of recently-used registers.
- SmallVector<unsigned, 4> RecentRegs;
- SmallVector<unsigned, 4>::iterator RecentNext;
-
- // Last write-after-write register written.
- unsigned avoidWAW_;
-
- // Record that we just picked this register.
- void recordRecentlyUsed(unsigned reg) {
- assert(reg != 0 && "Recently used register is NOREG!");
- if (!RecentRegs.empty()) {
- *RecentNext++ = reg;
- if (RecentNext == RecentRegs.end())
- RecentNext = RecentRegs.begin();
- }
- }
-
- public:
- virtual const char* getPassName() const {
- return "Linear Scan Register Allocator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<SlotIndexes>();
- if (StrongPHIElim)
- AU.addRequiredID(StrongPHIEliminationID);
- // Make sure PassManager knows which analyses to make available
- // to coalescing and which analyses coalescing invalidates.
- AU.addRequiredTransitiveID(RegisterCoalescerPassID);
- AU.addRequired<CalculateSpillWeights>();
- AU.addRequiredID(LiveStacksID);
- AU.addPreservedID(LiveStacksID);
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- AU.addRequired<VirtRegMap>();
- AU.addPreserved<VirtRegMap>();
- AU.addRequired<LiveDebugVariables>();
- AU.addPreserved<LiveDebugVariables>();
- AU.addRequiredID(MachineDominatorsID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- /// runOnMachineFunction - register allocate the whole function
- bool runOnMachineFunction(MachineFunction&);
-
- // Determine if we skip this register due to its being recently used.
- bool isRecentlyUsed(unsigned reg) const {
- return reg == avoidWAW_ ||
- std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
- }
-
- private:
- /// linearScan - the linear scan algorithm
- void linearScan();
-
- /// initIntervalSets - initialize the interval sets.
- ///
- void initIntervalSets();
-
- /// processActiveIntervals - expire old intervals and move non-overlapping
- /// ones to the inactive list.
- void processActiveIntervals(SlotIndex CurPoint);
-
- /// processInactiveIntervals - expire old intervals and move overlapping
- /// ones to the active list.
- void processInactiveIntervals(SlotIndex CurPoint);
-
- /// hasNextReloadInterval - Return the next liveinterval that's being
- /// defined by a reload from the same SS as the specified one.
- LiveInterval *hasNextReloadInterval(LiveInterval *cur);
-
- /// DowngradeRegister - Downgrade a register for allocation.
- void DowngradeRegister(LiveInterval *li, unsigned Reg);
-
- /// UpgradeRegister - Upgrade a register for allocation.
- void UpgradeRegister(unsigned Reg);
-
- /// assignRegOrStackSlotAtInterval - assign a register if one
- /// is available, or spill.
- void assignRegOrStackSlotAtInterval(LiveInterval* cur);
-
- void updateSpillWeights(std::vector<float> &Weights,
- unsigned reg, float weight,
- const TargetRegisterClass *RC);
-
- /// findIntervalsToSpill - Determine the intervals to spill for the
- /// specified interval. It's passed the physical registers whose spill
- /// weight is the lowest among all the registers whose live intervals
- /// conflict with the interval.
- void findIntervalsToSpill(LiveInterval *cur,
- std::vector<std::pair<unsigned,float> > &Candidates,
- unsigned NumCands,
- SmallVector<LiveInterval*, 8> &SpillIntervals);
-
- /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
- /// try to allocate the definition to the same register as the source,
- /// if the register is not defined during the life time of the interval.
- /// This eliminates a copy, and is used to coalesce copies which were not
- /// coalesced away before allocation either due to dest and src being in
- /// different register classes or because the coalescer was overly
- /// conservative.
- unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
-
- ///
- /// Register usage / availability tracking helpers.
- ///
-
- void initRegUses() {
- regUse_.resize(tri_->getNumRegs(), 0);
- regUseBackUp_.resize(tri_->getNumRegs(), 0);
- }
-
- void finalizeRegUses() {
-#ifndef NDEBUG
- // Verify all the registers are "freed".
- bool Error = false;
- for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
- if (regUse_[i] != 0) {
- dbgs() << tri_->getName(i) << " is still in use!\n";
- Error = true;
- }
- }
- if (Error)
- llvm_unreachable(0);
-#endif
- regUse_.clear();
- regUseBackUp_.clear();
- }
-
- void addRegUse(unsigned physReg) {
- assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
- "should be physical register!");
- ++regUse_[physReg];
- for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
- ++regUse_[*as];
- }
-
- void delRegUse(unsigned physReg) {
- assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
- "should be physical register!");
- assert(regUse_[physReg] != 0);
- --regUse_[physReg];
- for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
- assert(regUse_[*as] != 0);
- --regUse_[*as];
- }
- }
-
- bool isRegAvail(unsigned physReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
- "should be physical register!");
- return regUse_[physReg] == 0;
- }
-
- void backUpRegUses() {
- regUseBackUp_ = regUse_;
- }
-
- void restoreRegUses() {
- regUse_ = regUseBackUp_;
- }
-
- ///
- /// Register handling helpers.
- ///
-
- /// getFreePhysReg - return a free physical register for this virtual
- /// register interval if we have one, otherwise return 0.
- unsigned getFreePhysReg(LiveInterval* cur);
- unsigned getFreePhysReg(LiveInterval* cur,
- const TargetRegisterClass *RC,
- unsigned MaxInactiveCount,
- SmallVector<unsigned, 256> &inactiveCounts,
- bool SkipDGRegs);
-
- /// getFirstNonReservedPhysReg - return the first non-reserved physical
- /// register in the register class.
- unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
- ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
- assert(!O.empty() && "All registers reserved?!");
- return O.front();
- }
-
- void ComputeRelatedRegClasses();
-
- template <typename ItTy>
- void printIntervals(const char* const str, ItTy i, ItTy e) const {
- DEBUG({
- if (str)
- dbgs() << str << " intervals:\n";
-
- for (; i != e; ++i) {
- dbgs() << '\t' << *i->first << " -> ";
-
- unsigned reg = i->first->reg;
- if (TargetRegisterInfo::isVirtualRegister(reg))
- reg = vrm_->getPhys(reg);
-
- dbgs() << tri_->getName(reg) << '\n';
- }
- });
- }
- };
- char RALinScan::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
-INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false)
-
-void RALinScan::ComputeRelatedRegClasses() {
- // First pass, add all reg classes to the union, and determine at least one
- // reg class that each register is in.
- bool HasAliases = false;
- for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
- E = tri_->regclass_end(); RCI != E; ++RCI) {
- RelatedRegClasses.insert(*RCI);
- for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
- I != E; ++I) {
- HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
- const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
- if (PRC) {
- // Already processed this register. Just make sure we know that
- // multiple register classes share a register.
- RelatedRegClasses.unionSets(PRC, *RCI);
- } else {
- PRC = *RCI;
- }
- }
- }
-
- // Second pass, now that we know conservatively what register classes each reg
- // belongs to, add info about aliases. We don't need to do this for targets
- // without register aliases.
- if (HasAliases)
- for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
- I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
- I != E; ++I)
- for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
- const TargetRegisterClass *AliasClass =
- OneClassForEachPhysReg.lookup(*AS);
- if (AliasClass)
- RelatedRegClasses.unionSets(I->second, AliasClass);
- }
-}
-
-/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
-/// allocate the definition the same register as the source register if the
-/// register is not defined during live time of the interval. If the interval is
-/// killed by a copy, try to use the destination register. This eliminates a
-/// copy. This is used to coalesce copies which were not coalesced away before
-/// allocation either due to dest and src being in different register classes or
-/// because the coalescer was overly conservative.
-unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
- unsigned Preference = vrm_->getRegAllocPref(cur.reg);
- if ((Preference && Preference == Reg) || !cur.containsOneValue())
- return Reg;
-
- // We cannot handle complicated live ranges. Simple linear stuff only.
- if (cur.ranges.size() != 1)
- return Reg;
-
- const LiveRange &range = cur.ranges.front();
-
- VNInfo *vni = range.valno;
- if (vni->isUnused() || !vni->def.isValid())
- return Reg;
-
- unsigned CandReg;
- {
- MachineInstr *CopyMI;
- if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
- // Defined by a copy, try to extend SrcReg forward
- CandReg = CopyMI->getOperand(1).getReg();
- else if (TrivCoalesceEnds &&
- (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
- CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
- // Only used by a copy, try to extend DstReg backwards
- CandReg = CopyMI->getOperand(0).getReg();
- else
- return Reg;
-
- // If the target of the copy is a sub-register then don't coalesce.
- if(CopyMI->getOperand(0).getSubReg())
- return Reg;
- }
-
- if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
- if (!vrm_->isAssignedReg(CandReg))
- return Reg;
- CandReg = vrm_->getPhys(CandReg);
- }
- if (Reg == CandReg)
- return Reg;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
- if (!RC->contains(CandReg))
- return Reg;
-
- if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
- return Reg;
-
- // Try to coalesce.
- DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
- << '\n');
- vrm_->clearVirt(cur.reg);
- vrm_->assignVirt2Phys(cur.reg, CandReg);
-
- ++NumCoalesce;
- return CandReg;
-}
-
-bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
- mf_ = &fn;
- mri_ = &fn.getRegInfo();
- tm_ = &fn.getTarget();
- tri_ = tm_->getRegisterInfo();
- tii_ = tm_->getInstrInfo();
- allocatableRegs_ = tri_->getAllocatableSet(fn);
- reservedRegs_ = tri_->getReservedRegs(fn);
- li_ = &getAnalysis<LiveIntervals>();
- loopInfo = &getAnalysis<MachineLoopInfo>();
- RegClassInfo.runOnMachineFunction(fn);
-
- // We don't run the coalescer here because we have no reason to
- // interact with it. If the coalescer requires interaction, it
- // won't do anything. If it doesn't require interaction, we assume
- // it was run as a separate pass.
-
- // If this is the first function compiled, compute the related reg classes.
- if (RelatedRegClasses.empty())
- ComputeRelatedRegClasses();
-
- // Also resize register usage trackers.
- initRegUses();
-
- vrm_ = &getAnalysis<VirtRegMap>();
- if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
- spiller_.reset(createSpiller(*this, *mf_, *vrm_));
-
- initIntervalSets();
-
- linearScan();
-
- // Rewrite spill code and update the PhysRegsUsed set.
- rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
-
- // Write out new DBG_VALUE instructions.
- getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
-
- assert(unhandled_.empty() && "Unhandled live intervals remain!");
-
- finalizeRegUses();
-
- fixed_.clear();
- active_.clear();
- inactive_.clear();
- handled_.clear();
- NextReloadMap.clear();
- DowngradedRegs.clear();
- DowngradeMap.clear();
- spiller_.reset(0);
-
- return true;
-}
-
-/// initIntervalSets - initialize the interval sets.
-///
-void RALinScan::initIntervalSets()
-{
- assert(unhandled_.empty() && fixed_.empty() &&
- active_.empty() && inactive_.empty() &&
- "interval sets should be empty on initialization");
-
- handled_.reserve(li_->getNumIntervals());
-
- for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
- if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
- if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
- mri_->setPhysRegUsed(i->second->reg);
- fixed_.push_back(std::make_pair(i->second, i->second->begin()));
- }
- } else {
- if (i->second->empty()) {
- assignRegOrStackSlotAtInterval(i->second);
- }
- else
- unhandled_.push(i->second);
- }
- }
-}
-
-void RALinScan::linearScan() {
- // linear scan algorithm
- DEBUG({
- dbgs() << "********** LINEAR SCAN **********\n"
- << "********** Function: "
- << mf_->getFunction()->getName() << '\n';
- printIntervals("fixed", fixed_.begin(), fixed_.end());
- });
-
- while (!unhandled_.empty()) {
- // pick the interval with the earliest start point
- LiveInterval* cur = unhandled_.top();
- unhandled_.pop();
- ++NumIters;
- DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
-
- assert(!cur->empty() && "Empty interval in unhandled set.");
-
- processActiveIntervals(cur->beginIndex());
- processInactiveIntervals(cur->beginIndex());
-
- assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
- "Can only allocate virtual registers!");
-
- // Allocating a virtual register. try to find a free
- // physical register or spill an interval (possibly this one) in order to
- // assign it one.
- assignRegOrStackSlotAtInterval(cur);
-
- DEBUG({
- printIntervals("active", active_.begin(), active_.end());
- printIntervals("inactive", inactive_.begin(), inactive_.end());
- });
- }
-
- // Expire any remaining active intervals
- while (!active_.empty()) {
- IntervalPtr &IP = active_.back();
- unsigned reg = IP.first->reg;
- DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- delRegUse(reg);
- active_.pop_back();
- }
-
- // Expire any remaining inactive intervals
- DEBUG({
- for (IntervalPtrs::reverse_iterator
- i = inactive_.rbegin(); i != inactive_.rend(); ++i)
- dbgs() << "\tinterval " << *i->first << " expired\n";
- });
- inactive_.clear();
-
- // Add live-ins to every BB except for entry. Also perform trivial coalescing.
- MachineFunction::iterator EntryMBB = mf_->begin();
- SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
- for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
- LiveInterval &cur = *i->second;
- unsigned Reg = 0;
- bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
- if (isPhys)
- Reg = cur.reg;
- else if (vrm_->isAssignedReg(cur.reg))
- Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
- if (!Reg)
- continue;
- // Ignore splited live intervals.
- if (!isPhys && vrm_->getPreSplitReg(cur.reg))
- continue;
-
- for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
- I != E; ++I) {
- const LiveRange &LR = *I;
- if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
- for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
- if (LiveInMBBs[i] != EntryMBB) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
- "Adding a virtual register to livein set?");
- LiveInMBBs[i]->addLiveIn(Reg);
- }
- LiveInMBBs.clear();
- }
- }
- }
-
- DEBUG(dbgs() << *vrm_);
-
- // Look for physical registers that end up not being allocated even though
- // register allocator had to spill other registers in its register class.
- if (!vrm_->FindUnusedRegisters(li_))
- return;
-}
-
-/// processActiveIntervals - expire old intervals and move non-overlapping ones
-/// to the inactive list.
-void RALinScan::processActiveIntervals(SlotIndex CurPoint)
-{
- DEBUG(dbgs() << "\tprocessing active intervals:\n");
-
- for (unsigned i = 0, e = active_.size(); i != e; ++i) {
- LiveInterval *Interval = active_[i].first;
- LiveInterval::iterator IntervalPos = active_[i].second;
- unsigned reg = Interval->reg;
-
- IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
- if (IntervalPos == Interval->end()) { // Remove expired intervals.
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- delRegUse(reg);
-
- // Pop off the end of the list.
- active_[i] = active_.back();
- active_.pop_back();
- --i; --e;
-
- } else if (IntervalPos->start > CurPoint) {
- // Move inactive intervals to inactive list.
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- delRegUse(reg);
- // add to inactive.
- inactive_.push_back(std::make_pair(Interval, IntervalPos));
-
- // Pop off the end of the list.
- active_[i] = active_.back();
- active_.pop_back();
- --i; --e;
- } else {
- // Otherwise, just update the iterator position.
- active_[i].second = IntervalPos;
- }
- }
-}
-
-/// processInactiveIntervals - expire old intervals and move overlapping
-/// ones to the active list.
-void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
-{
- DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
-
- for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
- LiveInterval *Interval = inactive_[i].first;
- LiveInterval::iterator IntervalPos = inactive_[i].second;
- unsigned reg = Interval->reg;
-
- IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
- if (IntervalPos == Interval->end()) { // remove expired intervals.
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
-
- // Pop off the end of the list.
- inactive_[i] = inactive_.back();
- inactive_.pop_back();
- --i; --e;
- } else if (IntervalPos->start <= CurPoint) {
- // move re-activated intervals in active list
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- addRegUse(reg);
- // add to active
- active_.push_back(std::make_pair(Interval, IntervalPos));
-
- // Pop off the end of the list.
- inactive_[i] = inactive_.back();
- inactive_.pop_back();
- --i; --e;
- } else {
- // Otherwise, just update the iterator position.
- inactive_[i].second = IntervalPos;
- }
- }
-}
-
-/// updateSpillWeights - updates the spill weights of the specifed physical
-/// register and its weight.
-void RALinScan::updateSpillWeights(std::vector<float> &Weights,
- unsigned reg, float weight,
- const TargetRegisterClass *RC) {
- SmallSet<unsigned, 4> Processed;
- SmallSet<unsigned, 4> SuperAdded;
- SmallVector<unsigned, 4> Supers;
- Weights[reg] += weight;
- Processed.insert(reg);
- for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
- Weights[*as] += weight;
- Processed.insert(*as);
- if (tri_->isSubRegister(*as, reg) &&
- SuperAdded.insert(*as) &&
- RC->contains(*as)) {
- Supers.push_back(*as);
- }
- }
-
- // If the alias is a super-register, and the super-register is in the
- // register class we are trying to allocate. Then add the weight to all
- // sub-registers of the super-register even if they are not aliases.
- // e.g. allocating for GR32, bh is not used, updating bl spill weight.
- // bl should get the same spill weight otherwise it will be chosen
- // as a spill candidate since spilling bh doesn't make ebx available.
- for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
- for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
- if (!Processed.count(*sr))
- Weights[*sr] += weight;
- }
-}
-
-static
-RALinScan::IntervalPtrs::iterator
-FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
- for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
- I != E; ++I)
- if (I->first == LI) return I;
- return IP.end();
-}
-
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
- SlotIndex Point){
- for (unsigned i = 0, e = V.size(); i != e; ++i) {
- RALinScan::IntervalPtr &IP = V[i];
- LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
- IP.second, Point);
- if (I != IP.first->begin()) --I;
- IP.second = I;
- }
-}
-
-/// getConflictWeight - Return the number of conflicts between cur
-/// live interval and defs and uses of Reg weighted by loop depthes.
-static
-float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
- MachineRegisterInfo *mri_,
- MachineLoopInfo *loopInfo) {
- float Conflicts = 0;
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineInstr *MI = &*I;
- if (cur->liveAt(li_->getInstructionIndex(MI))) {
- unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
- Conflicts += std::pow(10.0f, (float)loopDepth);
- }
- }
- return Conflicts;
-}
-
-/// findIntervalsToSpill - Determine the intervals to spill for the
-/// specified interval. It's passed the physical registers whose spill
-/// weight is the lowest among all the registers whose live intervals
-/// conflict with the interval.
-void RALinScan::findIntervalsToSpill(LiveInterval *cur,
- std::vector<std::pair<unsigned,float> > &Candidates,
- unsigned NumCands,
- SmallVector<LiveInterval*, 8> &SpillIntervals) {
- // We have figured out the *best* register to spill. But there are other
- // registers that are pretty good as well (spill weight within 3%). Spill
- // the one that has fewest defs and uses that conflict with cur.
- float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
- SmallVector<LiveInterval*, 8> SLIs[3];
-
- DEBUG({
- dbgs() << "\tConsidering " << NumCands << " candidates: ";
- for (unsigned i = 0; i != NumCands; ++i)
- dbgs() << tri_->getName(Candidates[i].first) << " ";
- dbgs() << "\n";
- });
-
- // Calculate the number of conflicts of each candidate.
- for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
- unsigned Reg = i->first->reg;
- unsigned PhysReg = vrm_->getPhys(Reg);
- if (!cur->overlapsFrom(*i->first, i->second))
- continue;
- for (unsigned j = 0; j < NumCands; ++j) {
- unsigned Candidate = Candidates[j].first;
- if (tri_->regsOverlap(PhysReg, Candidate)) {
- if (NumCands > 1)
- Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
- SLIs[j].push_back(i->first);
- }
- }
- }
-
- for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
- unsigned Reg = i->first->reg;
- unsigned PhysReg = vrm_->getPhys(Reg);
- if (!cur->overlapsFrom(*i->first, i->second-1))
- continue;
- for (unsigned j = 0; j < NumCands; ++j) {
- unsigned Candidate = Candidates[j].first;
- if (tri_->regsOverlap(PhysReg, Candidate)) {
- if (NumCands > 1)
- Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
- SLIs[j].push_back(i->first);
- }
- }
- }
-
- // Which is the best candidate?
- unsigned BestCandidate = 0;
- float MinConflicts = Conflicts[0];
- for (unsigned i = 1; i != NumCands; ++i) {
- if (Conflicts[i] < MinConflicts) {
- BestCandidate = i;
- MinConflicts = Conflicts[i];
- }
- }
-
- std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
- std::back_inserter(SpillIntervals));
-}
-
-namespace {
- struct WeightCompare {
- private:
- const RALinScan &Allocator;
-
- public:
- WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
-
- typedef std::pair<unsigned, float> RegWeightPair;
- bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
- return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
- }
- };
-}
-
-static bool weightsAreClose(float w1, float w2) {
- if (!NewHeuristic)
- return false;
-
- float diff = w1 - w2;
- if (diff <= 0.02f) // Within 0.02f
- return true;
- return (diff / w2) <= 0.05f; // Within 5%.
-}
-
-LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
- DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
- if (I == NextReloadMap.end())
- return 0;
- return &li_->getInterval(I->second);
-}
-
-void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
- for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
- bool isNew = DowngradedRegs.insert(*AS);
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Multiple reloads holding the same register?");
- DowngradeMap.insert(std::make_pair(li->reg, *AS));
- }
- ++NumDowngrade;
-}
-
-void RALinScan::UpgradeRegister(unsigned Reg) {
- if (Reg) {
- DowngradedRegs.erase(Reg);
- for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
- DowngradedRegs.erase(*AS);
- }
-}
-
-namespace {
- struct LISorter {
- bool operator()(LiveInterval* A, LiveInterval* B) {
- return A->beginIndex() < B->beginIndex();
- }
- };
-}
-
-/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
-/// spill.
-void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- DEBUG(dbgs() << "\tallocating current interval from "
- << RC->getName() << ": ");
-
- // This is an implicitly defined live interval, just assign any register.
- if (cur->empty()) {
- unsigned physReg = vrm_->getRegAllocPref(cur->reg);
- if (!physReg)
- physReg = getFirstNonReservedPhysReg(RC);
- DEBUG(dbgs() << tri_->getName(physReg) << '\n');
- // Note the register is not really in use.
- vrm_->assignVirt2Phys(cur->reg, physReg);
- return;
- }
-
- backUpRegUses();
-
- std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
- SlotIndex StartPosition = cur->beginIndex();
- const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
- // If start of this live interval is defined by a move instruction and its
- // source is assigned a physical register that is compatible with the target
- // register class, then we should try to assign it the same register.
- // This can happen when the move is from a larger register class to a smaller
- // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
- if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
- VNInfo *vni = cur->begin()->valno;
- if (!vni->isUnused() && vni->def.isValid()) {
- MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
- if (CopyMI && CopyMI->isCopy()) {
- unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
- unsigned SrcReg = CopyMI->getOperand(1).getReg();
- unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
- unsigned Reg = 0;
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
- Reg = SrcReg;
- else if (vrm_->isAssignedReg(SrcReg))
- Reg = vrm_->getPhys(SrcReg);
- if (Reg) {
- if (SrcSubReg)
- Reg = tri_->getSubReg(Reg, SrcSubReg);
- if (DstSubReg)
- Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
- if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
- mri_->setRegAllocationHint(cur->reg, 0, Reg);
- }
- }
- }
- }
-
- // For every interval in inactive we overlap with, mark the
- // register as not free and update spill weights.
- for (IntervalPtrs::const_iterator i = inactive_.begin(),
- e = inactive_.end(); i != e; ++i) {
- unsigned Reg = i->first->reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "Can only allocate virtual registers!");
- const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
- // If this is not in a related reg class to the register we're allocating,
- // don't check it.
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
- cur->overlapsFrom(*i->first, i->second-1)) {
- Reg = vrm_->getPhys(Reg);
- addRegUse(Reg);
- SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
- }
- }
-
- // Speculatively check to see if we can get a register right now. If not,
- // we know we won't be able to by adding more constraints. If so, we can
- // check to see if it is valid. Doing an exhaustive search of the fixed_ list
- // is very bad (it contains all callee clobbered registers for any functions
- // with a call), so we want to avoid doing that if possible.
- unsigned physReg = getFreePhysReg(cur);
- unsigned BestPhysReg = physReg;
- if (physReg) {
- // We got a register. However, if it's in the fixed_ list, we might
- // conflict with it. Check to see if we conflict with it or any of its
- // aliases.
- SmallSet<unsigned, 8> RegAliases;
- for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
- RegAliases.insert(*AS);
-
- bool ConflictsWithFixed = false;
- for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
- IntervalPtr &IP = fixed_[i];
- if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
- // Okay, this reg is on the fixed list. Check to see if we actually
- // conflict.
- LiveInterval *I = IP.first;
- if (I->endIndex() > StartPosition) {
- LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
- IP.second = II;
- if (II != I->begin() && II->start > StartPosition)
- --II;
- if (cur->overlapsFrom(*I, II)) {
- ConflictsWithFixed = true;
- break;
- }
- }
- }
- }
-
- // Okay, the register picked by our speculative getFreePhysReg call turned
- // out to be in use. Actually add all of the conflicting fixed registers to
- // regUse_ so we can do an accurate query.
- if (ConflictsWithFixed) {
- // For every interval in fixed we overlap with, mark the register as not
- // free and update spill weights.
- for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
- IntervalPtr &IP = fixed_[i];
- LiveInterval *I = IP.first;
-
- const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
- I->endIndex() > StartPosition) {
- LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
- IP.second = II;
- if (II != I->begin() && II->start > StartPosition)
- --II;
- if (cur->overlapsFrom(*I, II)) {
- unsigned reg = I->reg;
- addRegUse(reg);
- SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
- }
- }
- }
-
- // Using the newly updated regUse_ object, which includes conflicts in the
- // future, see if there are any registers available.
- physReg = getFreePhysReg(cur);
- }
- }
-
- // Restore the physical register tracker, removing information about the
- // future.
- restoreRegUses();
-
- // If we find a free register, we are done: assign this virtual to
- // the free physical register and add this interval to the active
- // list.
- if (physReg) {
- DEBUG(dbgs() << tri_->getName(physReg) << '\n');
- assert(RC->contains(physReg) && "Invalid candidate");
- vrm_->assignVirt2Phys(cur->reg, physReg);
- addRegUse(physReg);
- active_.push_back(std::make_pair(cur, cur->begin()));
- handled_.push_back(cur);
-
- // Remember physReg for avoiding a write-after-write hazard in the next
- // instruction.
- if (AvoidWAWHazard &&
- tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
- avoidWAW_ = physReg;
-
- // "Upgrade" the physical register since it has been allocated.
- UpgradeRegister(physReg);
- if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
- // "Downgrade" physReg to try to keep physReg from being allocated until
- // the next reload from the same SS is allocated.
- mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
- DowngradeRegister(cur, physReg);
- }
- return;
- }
- DEBUG(dbgs() << "no free registers\n");
-
- // Compile the spill weights into an array that is better for scanning.
- std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
- for (std::vector<std::pair<unsigned, float> >::iterator
- I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
- updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
- // for each interval in active, update spill weights.
- for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
- i != e; ++i) {
- unsigned reg = i->first->reg;
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
- }
-
- DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
-
- // Find a register to spill.
- float minWeight = HUGE_VALF;
- unsigned minReg = 0;
-
- bool Found = false;
- std::vector<std::pair<unsigned,float> > RegsWeights;
- ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
- if (!minReg || SpillWeights[minReg] == HUGE_VALF)
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned reg = Order[i];
- float regWeight = SpillWeights[reg];
- // Skip recently allocated registers and reserved registers.
- if (minWeight > regWeight && !isRecentlyUsed(reg))
- Found = true;
- RegsWeights.push_back(std::make_pair(reg, regWeight));
- }
-
- // If we didn't find a register that is spillable, try aliases?
- if (!Found) {
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned reg = Order[i];
- // No need to worry about if the alias register size < regsize of RC.
- // We are going to spill all registers that alias it anyway.
- for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
- RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
- }
- }
-
- // Sort all potential spill candidates by weight.
- std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
- minReg = RegsWeights[0].first;
- minWeight = RegsWeights[0].second;
- if (minWeight == HUGE_VALF) {
- // All registers must have inf weight. Just grab one!
- minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
- if (cur->weight == HUGE_VALF ||
- li_->getApproximateInstructionCount(*cur) == 0) {
- // Spill a physical register around defs and uses.
- if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
- // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
- // in fixed_. Reset them.
- for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
- IntervalPtr &IP = fixed_[i];
- LiveInterval *I = IP.first;
- if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
- IP.second = I->advanceTo(I->begin(), StartPosition);
- }
-
- DowngradedRegs.clear();
- assignRegOrStackSlotAtInterval(cur);
- } else {
- assert(false && "Ran out of registers during register allocation!");
- report_fatal_error("Ran out of registers during register allocation!");
- }
- return;
- }
- }
-
- // Find up to 3 registers to consider as spill candidates.
- unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
- while (LastCandidate > 1) {
- if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
- break;
- --LastCandidate;
- }
-
- DEBUG({
- dbgs() << "\t\tregister(s) with min weight(s): ";
-
- for (unsigned i = 0; i != LastCandidate; ++i)
- dbgs() << tri_->getName(RegsWeights[i].first)
- << " (" << RegsWeights[i].second << ")\n";
- });
-
- // If the current has the minimum weight, we need to spill it and
- // add any added intervals back to unhandled, and restart
- // linearscan.
- if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
- DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
- SmallVector<LiveInterval*, 8> added;
- LiveRangeEdit LRE(*cur, added);
- spiller_->spill(LRE);
-
- std::sort(added.begin(), added.end(), LISorter());
- if (added.empty())
- return; // Early exit if all spills were folded.
-
- // Merge added with unhandled. Note that we have already sorted
- // intervals returned by addIntervalsForSpills by their starting
- // point.
- // This also update the NextReloadMap. That is, it adds mapping from a
- // register defined by a reload from SS to the next reload from SS in the
- // same basic block.
- MachineBasicBlock *LastReloadMBB = 0;
- LiveInterval *LastReload = 0;
- int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
- for (unsigned i = 0, e = added.size(); i != e; ++i) {
- LiveInterval *ReloadLi = added[i];
- if (ReloadLi->weight == HUGE_VALF &&
- li_->getApproximateInstructionCount(*ReloadLi) == 0) {
- SlotIndex ReloadIdx = ReloadLi->beginIndex();
- MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
- int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
- if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
- // Last reload of same SS is in the same MBB. We want to try to
- // allocate both reloads the same register and make sure the reg
- // isn't clobbered in between if at all possible.
- assert(LastReload->beginIndex() < ReloadIdx);
- NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
- }
- LastReloadMBB = ReloadMBB;
- LastReload = ReloadLi;
- LastReloadSS = ReloadSS;
- }
- unhandled_.push(ReloadLi);
- }
- return;
- }
-
- ++NumBacktracks;
-
- // Push the current interval back to unhandled since we are going
- // to re-run at least this iteration. Since we didn't modify it it
- // should go back right in the front of the list
- unhandled_.push(cur);
-
- assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
- "did not choose a register to spill?");
-
- // We spill all intervals aliasing the register with
- // minimum weight, rollback to the interval with the earliest
- // start point and let the linear scan algorithm run again
- SmallVector<LiveInterval*, 8> spillIs;
-
- // Determine which intervals have to be spilled.
- findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
-
- // Set of spilled vregs (used later to rollback properly)
- SmallSet<unsigned, 8> spilled;
-
- // The earliest start of a Spilled interval indicates up to where
- // in handled we need to roll back
- assert(!spillIs.empty() && "No spill intervals?");
- SlotIndex earliestStart = spillIs[0]->beginIndex();
-
- // Spill live intervals of virtual regs mapped to the physical register we
- // want to clear (and its aliases). We only spill those that overlap with the
- // current interval as the rest do not affect its allocation. we also keep
- // track of the earliest start of all spilled live intervals since this will
- // mark our rollback point.
- SmallVector<LiveInterval*, 8> added;
- while (!spillIs.empty()) {
- LiveInterval *sli = spillIs.back();
- spillIs.pop_back();
- DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
- if (sli->beginIndex() < earliestStart)
- earliestStart = sli->beginIndex();
- LiveRangeEdit LRE(*sli, added, 0, &spillIs);
- spiller_->spill(LRE);
- spilled.insert(sli->reg);
- }
-
- // Include any added intervals in earliestStart.
- for (unsigned i = 0, e = added.size(); i != e; ++i) {
- SlotIndex SI = added[i]->beginIndex();
- if (SI < earliestStart)
- earliestStart = SI;
- }
-
- DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
-
- // Scan handled in reverse order up to the earliest start of a
- // spilled live interval and undo each one, restoring the state of
- // unhandled.
- while (!handled_.empty()) {
- LiveInterval* i = handled_.back();
- // If this interval starts before t we are done.
- if (!i->empty() && i->beginIndex() < earliestStart)
- break;
- DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
- handled_.pop_back();
-
- // When undoing a live interval allocation we must know if it is active or
- // inactive to properly update regUse_ and the VirtRegMap.
- IntervalPtrs::iterator it;
- if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
- active_.erase(it);
- assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
- if (!spilled.count(i->reg))
- unhandled_.push(i);
- delRegUse(vrm_->getPhys(i->reg));
- vrm_->clearVirt(i->reg);
- } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
- inactive_.erase(it);
- assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
- if (!spilled.count(i->reg))
- unhandled_.push(i);
- vrm_->clearVirt(i->reg);
- } else {
- assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
- "Can only allocate virtual registers!");
- vrm_->clearVirt(i->reg);
- unhandled_.push(i);
- }
-
- DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
- if (ii == DowngradeMap.end())
- // It interval has a preference, it must be defined by a copy. Clear the
- // preference now since the source interval allocation may have been
- // undone as well.
- mri_->setRegAllocationHint(i->reg, 0, 0);
- else {
- UpgradeRegister(ii->second);
- }
- }
-
- // Rewind the iterators in the active, inactive, and fixed lists back to the
- // point we reverted to.
- RevertVectorIteratorsTo(active_, earliestStart);
- RevertVectorIteratorsTo(inactive_, earliestStart);
- RevertVectorIteratorsTo(fixed_, earliestStart);
-
- // Scan the rest and undo each interval that expired after t and
- // insert it in active (the next iteration of the algorithm will
- // put it in inactive if required)
- for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
- LiveInterval *HI = handled_[i];
- if (!HI->expiredAt(earliestStart) &&
- HI->expiredAt(cur->beginIndex())) {
- DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
- active_.push_back(std::make_pair(HI, HI->begin()));
- assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
- addRegUse(vrm_->getPhys(HI->reg));
- }
- }
-
- // Merge added with unhandled.
- // This also update the NextReloadMap. That is, it adds mapping from a
- // register defined by a reload from SS to the next reload from SS in the
- // same basic block.
- MachineBasicBlock *LastReloadMBB = 0;
- LiveInterval *LastReload = 0;
- int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
- std::sort(added.begin(), added.end(), LISorter());
- for (unsigned i = 0, e = added.size(); i != e; ++i) {
- LiveInterval *ReloadLi = added[i];
- if (ReloadLi->weight == HUGE_VALF &&
- li_->getApproximateInstructionCount(*ReloadLi) == 0) {
- SlotIndex ReloadIdx = ReloadLi->beginIndex();
- MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
- int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
- if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
- // Last reload of same SS is in the same MBB. We want to try to
- // allocate both reloads the same register and make sure the reg
- // isn't clobbered in between if at all possible.
- assert(LastReload->beginIndex() < ReloadIdx);
- NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
- }
- LastReloadMBB = ReloadMBB;
- LastReload = ReloadLi;
- LastReloadSS = ReloadSS;
- }
- unhandled_.push(ReloadLi);
- }
-}
-
-unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
- const TargetRegisterClass *RC,
- unsigned MaxInactiveCount,
- SmallVector<unsigned, 256> &inactiveCounts,
- bool SkipDGRegs) {
- unsigned FreeReg = 0;
- unsigned FreeRegInactiveCount = 0;
-
- std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
- // Resolve second part of the hint (if possible) given the current allocation.
- unsigned physReg = Hint.second;
- if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
- physReg = vrm_->getPhys(physReg);
-
- ArrayRef<unsigned> Order;
- if (Hint.first)
- Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
- else
- Order = RegClassInfo.getOrder(RC);
-
- assert(!Order.empty() && "No allocatable register in this register class!");
-
- // Scan for the first available register.
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned Reg = Order[i];
- // Ignore "downgraded" registers.
- if (SkipDGRegs && DowngradedRegs.count(Reg))
- continue;
- // Skip reserved registers.
- if (reservedRegs_.test(Reg))
- continue;
- // Skip recently allocated registers.
- if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
- FreeReg = Reg;
- if (FreeReg < inactiveCounts.size())
- FreeRegInactiveCount = inactiveCounts[FreeReg];
- else
- FreeRegInactiveCount = 0;
- break;
- }
- }
-
- // If there are no free regs, or if this reg has the max inactive count,
- // return this register.
- if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
- // Remember what register we picked so we can skip it next time.
- if (FreeReg != 0) recordRecentlyUsed(FreeReg);
- return FreeReg;
- }
-
- // Continue scanning the registers, looking for the one with the highest
- // inactive count. Alkis found that this reduced register pressure very
- // slightly on X86 (in rev 1.94 of this file), though this should probably be
- // reevaluated now.
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned Reg = Order[i];
- // Ignore "downgraded" registers.
- if (SkipDGRegs && DowngradedRegs.count(Reg))
- continue;
- // Skip reserved registers.
- if (reservedRegs_.test(Reg))
- continue;
- if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
- FreeRegInactiveCount < inactiveCounts[Reg] &&
- (!SkipDGRegs || !isRecentlyUsed(Reg))) {
- FreeReg = Reg;
- FreeRegInactiveCount = inactiveCounts[Reg];
- if (FreeRegInactiveCount == MaxInactiveCount)
- break; // We found the one with the max inactive count.
- }
- }
-
- // Remember what register we picked so we can skip it next time.
- recordRecentlyUsed(FreeReg);
-
- return FreeReg;
-}
-
-/// getFreePhysReg - return a free physical register for this virtual register
-/// interval if we have one, otherwise return 0.
-unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
- SmallVector<unsigned, 256> inactiveCounts;
- unsigned MaxInactiveCount = 0;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
- for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
- i != e; ++i) {
- unsigned reg = i->first->reg;
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
-
- // If this is not in a related reg class to the register we're allocating,
- // don't check it.
- const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
- reg = vrm_->getPhys(reg);
- if (inactiveCounts.size() <= reg)
- inactiveCounts.resize(reg+1);
- ++inactiveCounts[reg];
- MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
- }
- }
-
- // If copy coalescer has assigned a "preferred" register, check if it's
- // available first.
- unsigned Preference = vrm_->getRegAllocPref(cur->reg);
- if (Preference) {
- DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
- if (isRegAvail(Preference) &&
- RC->contains(Preference))
- return Preference;
- }
-
- unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
- true);
- if (FreeReg)
- return FreeReg;
- return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
-}
-
-FunctionPass* llvm::createLinearScanRegisterAllocator() {
- return new RALinScan();
-}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 0d2cf2d6184c..a2846145bc7e 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -32,14 +32,17 @@
#define DEBUG_TYPE "regalloc"
#include "RenderMachineFunction.h"
-#include "Splitter.h"
+#include "Spiller.h"
#include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
#include "RegisterCoalescer.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -54,6 +57,7 @@
#include <limits>
#include <memory>
#include <set>
+#include <sstream>
#include <vector>
using namespace llvm;
@@ -67,10 +71,12 @@ pbqpCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
+#ifndef NDEBUG
static cl::opt<bool>
-pbqpPreSplitting("pbqp-pre-splitting",
- cl::desc("Pre-split before PBQP register allocation."),
- cl::init(false), cl::Hidden);
+pbqpDumpGraphs("pbqp-dump-graphs",
+ cl::desc("Dump graphs for each function/round in the compilation unit."),
+ cl::init(false), cl::Hidden);
+#endif
namespace {
@@ -88,11 +94,9 @@ public:
: MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
}
@@ -132,6 +136,7 @@ private:
MachineRegisterInfo *mri;
RenderMachineFunction *rmf;
+ std::auto_ptr<Spiller> spiller;
LiveIntervals *lis;
LiveStacks *lss;
VirtRegMap *vrm;
@@ -141,10 +146,6 @@ private:
/// \brief Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc();
- /// \brief Adds a stack interval if the given live interval has been
- /// spilled. Used to support stack slot coloring.
- void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
/// \brief Given a solved PBQP problem maps this solution back to a register
/// assignment.
bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
@@ -170,7 +171,7 @@ PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
assert(nodeItr != vreg2Node.end() && "No node for vreg.");
return nodeItr->second;
-
+
}
const PBQPRAProblem::AllowedSet&
@@ -195,9 +196,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
const RegSet &vregs) {
typedef std::vector<const LiveInterval*> LIVector;
-
+ ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots();
MachineRegisterInfo *mri = &mf->getRegInfo();
- const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
PBQP::Graph &g = p->getGraph();
@@ -214,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
BitVector reservedRegs = tri->getReservedRegs(*mf);
- // Iterate over vregs.
+ // Iterate over vregs.
for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
vregItr != vregEnd; ++vregItr) {
unsigned vreg = *vregItr;
@@ -224,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
// Compute an initial allowed set for the current vreg.
typedef std::vector<unsigned> VRAllowed;
VRAllowed vrAllowed;
- ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+ ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
if (!reservedRegs.test(preg)) {
@@ -232,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
}
- // Remove any physical registers which overlap.
+ RegSet overlappingPRegs;
+
+ // Record physical registers whose ranges overlap.
for (RegSet::const_iterator pregItr = pregs.begin(),
pregEnd = pregs.end();
pregItr != pregEnd; ++pregItr) {
@@ -243,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
continue;
}
- if (!vregLI->overlaps(*pregLI)) {
- continue;
+ if (vregLI->overlaps(*pregLI))
+ overlappingPRegs.insert(preg);
+ }
+
+ // Record any overlaps with regmask operands.
+ BitVector regMaskOverlaps(tri->getNumRegs());
+ for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(),
+ rmEnd = regMaskSlots.end();
+ rmItr != rmEnd; ++rmItr) {
+ SlotIndex rmIdx = *rmItr;
+ if (vregLI->liveAt(rmIdx)) {
+ MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx);
+ const uint32_t* regMask = 0;
+ for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(),
+ mopEnd = rmMI->operands_end();
+ mopItr != mopEnd; ++mopItr) {
+ if (mopItr->isRegMask()) {
+ regMask = mopItr->getRegMask();
+ break;
+ }
+ }
+ assert(regMask != 0 && "Couldn't find register mask.");
+ regMaskOverlaps.setBitsNotInMask(regMask);
}
+ }
+
+ for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) {
+ if (regMaskOverlaps.test(preg))
+ overlappingPRegs.insert(preg);
+ }
+
+ for (RegSet::const_iterator pregItr = overlappingPRegs.begin(),
+ pregEnd = overlappingPRegs.end();
+ pregItr != pregEnd; ++pregItr) {
+ unsigned preg = *pregItr;
// Remove the register from the allowed set.
VRAllowed::iterator eraseItr =
@@ -256,7 +291,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
// Also remove any aliases.
- const unsigned *aliasItr = tri->getAliasSet(preg);
+ const uint16_t *aliasItr = tri->getAliasSet(preg);
if (aliasItr != 0) {
for (; *aliasItr != 0; ++aliasItr) {
VRAllowed::iterator eraseItr =
@@ -270,7 +305,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
// Construct the node.
- PBQP::Graph::NodeItr node =
+ PBQP::Graph::NodeItr node =
g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
// Record the mapping and allowed set in the problem.
@@ -371,7 +406,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
const float copyFactor = 0.5; // Cost of copy relative to load. Current
// value plucked randomly out of the air.
-
+
PBQP::PBQPNum cBenefit =
copyFactor * LiveIntervals::getSpillWeight(false, true,
loopInfo->getLoopDepth(mbb));
@@ -382,7 +417,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
}
const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
- unsigned pregOpt = 0;
+ unsigned pregOpt = 0;
while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
++pregOpt;
}
@@ -407,7 +442,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
std::swap(allowed1, allowed2);
}
}
-
+
addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
cBenefit);
}
@@ -439,27 +474,29 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
if (preg1 == preg2) {
costMat[i + 1][j + 1] += -benefit;
- }
+ }
}
}
}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AliasAnalysis>();
+ au.addPreserved<AliasAnalysis>();
au.addRequired<SlotIndexes>();
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
//au.addRequiredID(SplitCriticalEdgesID);
- au.addRequiredID(RegisterCoalescerPassID);
if (customPassID)
au.addRequiredID(*customPassID);
au.addRequired<CalculateSpillWeights>();
au.addRequired<LiveStacks>();
au.addPreserved<LiveStacks>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
- if (pbqpPreSplitting)
- au.addRequired<LoopSplitter>();
au.addRequired<VirtRegMap>();
au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
@@ -488,29 +525,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() {
}
}
-void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
- MachineRegisterInfo* mri) {
- int stackSlot = vrm->getStackSlot(spilled->reg);
-
- if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
- return;
- }
-
- const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
- LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
-
- VNInfo *vni;
- if (stackInterval.getNumValNums() != 0) {
- vni = stackInterval.getValNumInfo(0);
- } else {
- vni = stackInterval.getNextValue(
- SlotIndex(), 0, lss->getVNInfoAllocator());
- }
-
- LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
- stackInterval.MergeRangesInAsValue(rhsInterval, vni);
-}
-
bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
const PBQP::Solution &solution) {
// Set to true if we have any spills
@@ -529,28 +543,22 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
unsigned alloc = solution.getSelection(node);
if (problem.isPRegOption(vreg, alloc)) {
- unsigned preg = problem.getPRegForOption(vreg, alloc);
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
assert(preg != 0 && "Invalid preg selected.");
- vrm->assignVirt2Phys(vreg, preg);
+ vrm->assignVirt2Phys(vreg, preg);
} else if (problem.isSpillOption(vreg, alloc)) {
vregsToAlloc.erase(vreg);
- const LiveInterval* spillInterval = &lis->getInterval(vreg);
- double oldWeight = spillInterval->weight;
- rmf->rememberUseDefs(spillInterval);
- std::vector<LiveInterval*> newSpills =
- lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
- addStackInterval(spillInterval, mri);
- rmf->rememberSpills(spillInterval, newSpills);
-
- (void) oldWeight;
+ SmallVector<LiveInterval*, 8> newSpills;
+ LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+ spiller->spill(LRE);
+
DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
- << oldWeight << ", New vregs: ");
+ << LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
- for (std::vector<LiveInterval*>::const_iterator
- itr = newSpills.begin(), end = newSpills.end();
+ for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
itr != end; ++itr) {
assert(!(*itr)->empty() && "Empty spill range.");
DEBUG(dbgs() << (*itr)->reg << " ");
@@ -560,9 +568,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
DEBUG(dbgs() << ")\n");
// We need another round if spill intervals were added.
- anotherRoundNeeded |= !newSpills.empty();
+ anotherRoundNeeded |= !LRE.empty();
} else {
- assert(false && "Unknown allocation option.");
+ llvm_unreachable("Unknown allocation option.");
}
}
@@ -642,7 +650,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
tm = &mf->getTarget();
tri = tm->getRegisterInfo();
tii = tm->getInstrInfo();
- mri = &mf->getRegInfo();
+ mri = &mf->getRegInfo();
lis = &getAnalysis<LiveIntervals>();
lss = &getAnalysis<LiveStacks>();
@@ -650,7 +658,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
rmf = &getAnalysis<RenderMachineFunction>();
vrm = &getAnalysis<VirtRegMap>();
+ spiller.reset(createInlineSpiller(*this, MF, *vrm));
+ mri->freezeReservedRegs(MF);
DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
@@ -666,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// Find the vreg intervals in need of allocation.
findVRegIntervalsToAlloc();
+ const Function* func = mf->getFunction();
+ std::string fqn =
+ func->getParent()->getModuleIdentifier() + "." +
+ func->getName().str();
+ (void)fqn;
+
// If there are non-empty intervals allocate them using pbqp.
if (!vregsToAlloc.empty()) {
@@ -677,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
std::auto_ptr<PBQPRAProblem> problem =
builder->build(mf, lis, loopInfo, vregsToAlloc);
+
+#ifndef NDEBUG
+ if (pbqpDumpGraphs) {
+ std::ostringstream rs;
+ rs << round;
+ std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
+ std::string tmp;
+ raw_fd_ostream os(graphFileName.c_str(), tmp);
+ DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
+ << graphFileName << "\"\n");
+ problem->getGraph().dump(os);
+ }
+#endif
+
PBQP::Solution solution =
PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
problem->getGraph());
@@ -698,9 +728,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
// Run rewriter
- std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+ vrm->rewrite(lis->getSlotIndexes());
- rewriter->runOnMachineFunction(*mf, *vrm, lis);
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ vrm->clearAllVirt();
+ mri->clearVirtRegs();
return true;
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 786d279c2b8c..17165fa72665 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -18,12 +18,16 @@
#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+ cl::desc("Limit all regclasses to N registers"));
+
RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
{}
@@ -39,14 +43,14 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
// Does this MF have different CSRs?
- const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+ const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
if (Update || CSR != CalleeSaved) {
// Build a CSRNum map. Every CSR alias gets an entry pointing to the last
// overlapping CSR.
CSRNum.clear();
CSRNum.resize(TRI->getNumRegs(), 0);
for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (const unsigned *AS = TRI->getOverlaps(Reg);
+ for (const uint16_t *AS = TRI->getOverlaps(Reg);
unsigned Alias = *AS; ++AS)
CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
Update = true;
@@ -81,7 +85,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// FIXME: Once targets reserve registers instead of removing them from the
// allocation order, we can simply use begin/end here.
- ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+ ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
for (unsigned i = 0; i != RawOrder.size(); ++i) {
unsigned PhysReg = RawOrder[i];
// Remove reserved registers from the allocation order.
@@ -99,6 +103,10 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// CSR aliases go after the volatile registers, preserve the target's order.
std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+ // Register allocator stress test. Clip register class to N registers.
+ if (StressRA && RCI.NumRegs > StressRA)
+ RCI.NumRegs = StressRA;
+
// Check if RC is a proper sub-class.
if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index 2c1407096cd7..400e1f48ce54 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -49,7 +49,7 @@ class RegisterClassInfo {
// Callee saved registers of last MF. Assumed to be valid until the next
// runOnFunction() call.
- const unsigned *CalleeSaved;
+ const uint16_t *CalleeSaved;
// Map register number to CalleeSaved index + 1;
SmallVector<uint8_t, 4> CSRNum;
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 9b414d6212c7..75f88cafdf01 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regcoalescing"
+#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
#include "LiveDebugVariables.h"
#include "RegisterClassInfo.h"
@@ -169,10 +169,6 @@ namespace {
/// it as well.
bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
- /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
- /// VNInfo copy flag for DstReg and all aliases.
- void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
-
/// markAsJoined - Remember that CopyMI has already been joined.
void markAsJoined(MachineInstr *CopyMI);
@@ -197,7 +193,7 @@ namespace {
};
} /// end anonymous namespace
-char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID;
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
@@ -205,9 +201,6 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
@@ -379,9 +372,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
- AU.addPreservedID(StrongPHIEliminationID);
- AU.addPreservedID(PHIEliminationID);
- AU.addPreservedID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -423,7 +413,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
LiveInterval &IntB =
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
@@ -434,40 +424,19 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Get the location that B is defined at. Two options: either this value has
// an unknown definition point or it is defined at CopyIdx. If unknown, we
// can't process it.
- if (!BValNo->isDefByCopy()) return false;
- assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+ if (BValNo->def != CopyIdx) return false;
// AValNo is the value number in A that defines the copy, A3 in the example.
- SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+ SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
// The live range might not exist after fun with physreg coalescing.
if (ALR == IntA.end()) return false;
VNInfo *AValNo = ALR->valno;
- // If it's re-defined by an early clobber somewhere in the live range, then
- // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
- // See PR3149:
- // 172 %ECX<def> = MOV32rr %reg1039<kill>
- // 180 INLINEASM <es:subl $5,$1
- // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
- // %EAX<kill>,
- // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
- // 188 %EAX<def> = MOV32rr %EAX<kill>
- // 196 %ECX<def> = MOV32rr %ECX<kill>
- // 204 %ECX<def> = MOV32rr %ECX<kill>
- // 212 %EAX<def> = MOV32rr %EAX<kill>
- // 220 %EAX<def> = MOV32rr %EAX
- // 228 %reg1039<def> = MOV32rr %ECX<kill>
- // The early clobber operand ties ECX input to the ECX def.
- //
- // The live interval of ECX is represented as this:
- // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
- // The coalescer has no idea there was a def in the middle of [174,230].
- if (AValNo->hasRedefByEC())
- return false;
// If AValNo is defined as a copy from IntB, we can potentially process this.
// Get the instruction that defines this value number.
- if (!CP.isCoalescable(AValNo->getCopy()))
+ MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!CP.isCoalescable(ACopyMI))
return false;
// Get the LiveRange in IntB that this value number starts with.
@@ -492,7 +461,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// of its aliases is overlapping the live interval of the virtual register.
// If so, do not coalesce.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
DEBUG({
dbgs() << "\t\tInterfere with alias ";
@@ -511,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// We are about to delete CopyMI, so need to remove it as the 'instruction
// that defines this value #'. Update the valnum with the new defining
// instruction #.
- BValNo->def = FillerStart;
- BValNo->setCopy(0);
+ BValNo->def = FillerStart;
// Okay, we can merge them. We need to insert a new liverange:
// [ValLR.end, BLR.begin) of either value number, then we merge the
@@ -522,12 +490,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// If the IntB live range is assigned to a physical register, and if that
// physreg has sub-registers, update their live intervals as well.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
+ for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
if (!LIS->hasInterval(*SR))
continue;
LiveInterval &SRLI = LIS->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart, 0,
+ SRLI.getNextValue(FillerStart,
LIS->getVNInfoAllocator())));
}
}
@@ -554,9 +522,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
ValLREndInst->getOperand(UIdx).setIsKill(false);
}
- // If the copy instruction was killing the destination register before the
- // merge, find the last use and trim the live range. That will also add the
- // isKill marker.
+ // Rewrite the copy. If the copy instruction was killing the destination
+ // register before the merge, find the last use and trim the live range. That
+ // will also add the isKill marker.
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(),
+ *TRI);
if (ALR->end == CopyIdx)
LIS->shrinkToUses(&IntA);
@@ -625,7 +595,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (!LIS->hasInterval(CP.getDstReg()))
return false;
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
LiveInterval &IntA =
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -635,13 +605,13 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
- if (!BValNo || !BValNo->isDefByCopy())
+ if (!BValNo || BValNo->def != CopyIdx)
return false;
assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
// AValNo is the value number in A that defines the copy, A3 in the example.
- VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
// If other defs can reach uses of this def, then it's not safe to perform
@@ -651,8 +621,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
return false;
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (!MCID.isCommutable())
+ if (!DefMI->isCommutable())
return false;
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
@@ -684,7 +653,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Abort if the aliases of IntB.reg have values that are not simply the
// clobbers from the superreg.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
if (LIS->hasInterval(*AS) &&
HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
return false;
@@ -718,7 +687,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
- MBB->insert(DefMI, NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
MBB->erase(DefMI);
}
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
@@ -747,7 +717,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
UseMO.setReg(NewReg);
continue;
}
- SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex();
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
@@ -765,7 +735,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// This copy will become a noop. If it's defining a new val#, merge it into
// BValNo.
- SlotIndex DefIdx = UseIdx.getDefIndex();
+ SlotIndex DefIdx = UseIdx.getRegSlot();
VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
if (!DVNI)
continue;
@@ -779,7 +749,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// is updated.
VNInfo *ValNo = BValNo;
ValNo->def = AValNo->def;
- ValNo->setCopy(0);
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -799,7 +768,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
bool preserveSrcInt,
unsigned DstReg,
MachineInstr *CopyMI) {
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
VNInfo *ValNo = SrcLR->valno;
@@ -809,14 +778,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
if (!DefMI)
return false;
assert(DefMI && "Defining instruction disappeared");
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (!MCID.isAsCheapAsAMove())
+ if (!DefMI->isAsCheapAsAMove())
return false;
if (!TII->isTriviallyReMaterializable(DefMI, AA))
return false;
bool SawStore = false;
if (!DefMI->isSafeToMove(TII, AA, SawStore))
return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
if (MCID.getNumDefs() != 1)
return false;
if (!DefMI->isImplicitDef()) {
@@ -831,27 +800,52 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
return false;
}
- RemoveCopyFlag(DstReg, CopyMI);
-
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
MachineInstr *NewMI = prior(MII);
+ // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+ // We need to remember these so we can add intervals once we insert
+ // NewMI into SlotIndexes.
+ SmallVector<unsigned, 4> NewMIImplDefs;
+ for (unsigned i = NewMI->getDesc().getNumOperands(),
+ e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (MO.isReg()) {
+ assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ NewMIImplDefs.push_back(MO.getReg());
+ }
+ }
+
// CopyMI may have implicit operands, transfer them over to the newly
// rematerialized instruction. And update implicit def interval valnos.
for (unsigned i = CopyMI->getDesc().getNumOperands(),
e = CopyMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = CopyMI->getOperand(i);
- if (MO.isReg() && MO.isImplicit())
- NewMI->addOperand(MO);
- if (MO.isDef())
- RemoveCopyFlag(MO.getReg(), CopyMI);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ // Discard VReg implicit defs.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ NewMI->addOperand(MO);
+ }
+ }
}
- NewMI->copyImplicitOps(CopyMI);
LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+ unsigned reg = NewMIImplDefs[i];
+ LiveInterval &li = LIS->getInterval(reg);
+ VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN);
+ li.addRange(lr);
+ }
+
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
@@ -887,7 +881,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
DstInt = SrcInt;
SrcInt = 0;
- VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex());
+ VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
assert(DeadVNI && "No value defined in DstInt");
DstInt->removeValNo(DeadVNI);
@@ -941,13 +935,10 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
- bool Kills = false, Deads = false;
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(Ops[i]);
- Kills |= MO.isKill();
- Deads |= MO.isDead();
// Make sure we don't create read-modify-write defs accidentally. We
// assume here that a SrcReg def cannot be joined into a live DstReg. If
@@ -967,19 +958,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
if (JoinedCopies.count(UseMI))
continue;
- if (SubIdx) {
- // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
- // read-modify-write of DstReg.
- if (Deads)
- UseMI->addRegisterDead(DstReg, TRI);
- else if (!Reads && Writes)
- UseMI->addRegisterDefined(DstReg, TRI);
-
- // Kill flags apply to the whole physical register.
- if (DstIsPhys && Kills)
- UseMI->addRegisterKilled(DstReg, TRI);
- }
-
DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugValue())
@@ -996,7 +974,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
const TargetRegisterInfo *TRI) {
if (li.empty()) {
if (TargetRegisterInfo::isPhysicalRegister(li.reg))
- for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
+ for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
if (!LIS->hasInterval(*SR))
continue;
LiveInterval &sli = LIS->getInterval(*SR);
@@ -1013,7 +991,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
/// the val# it defines. If the live interval becomes empty, remove it as well.
bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
MachineInstr *DefMI) {
- SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex();
+ SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot();
LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
if (DefIdx != MLR->valno->def)
return false;
@@ -1021,27 +999,6 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
return removeIntervalIfEmpty(li, LIS, TRI);
}
-void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
- const MachineInstr *CopyMI) {
- SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
- if (LIS->hasInterval(DstReg)) {
- LiveInterval &LI = LIS->getInterval(DstReg);
- if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->def == DefIdx)
- LR->valno->setCopy(0);
- }
- if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
- return;
- for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) {
- if (!LIS->hasInterval(*AS))
- continue;
- LiveInterval &LI = LIS->getInterval(*AS);
- if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->def == DefIdx)
- LR->valno->setCopy(0);
- }
-}
-
/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
/// We need to be careful about coalescing a source physical register with a
/// virtual register. Once the coalescing is done, it cannot be broken and these
@@ -1279,7 +1236,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
}
}
- // SrcReg is guarateed to be the register whose live interval that is
+ // SrcReg is guaranteed to be the register whose live interval that is
// being merged.
LIS->removeInterval(CP.getSrcReg());
@@ -1368,9 +1325,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
// FIXME: This is very conservative. For example, we don't handle
// physical registers.
- MachineInstr *MI = VNI->getCopy();
+ MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
- if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+ if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys())
return false;
unsigned Dst = MI->getOperand(0).getReg();
@@ -1388,11 +1345,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
assert(Dst == A);
VNInfo *Other = LR->valno;
- if (!Other->isDefByCopy())
- return false;
- const MachineInstr *OtherMI = Other->getCopy();
+ const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def);
- if (!OtherMI->isFullCopy())
+ if (!OtherMI || !OtherMI->isFullCopy())
return false;
unsigned OtherDst = OtherMI->getOperand(0).getReg();
@@ -1431,7 +1386,44 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// than the full interfeence check below. We allow overlapping live ranges
// only when one is a copy of the other.
if (CP.isPhys()) {
- for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ // Optimization for reserved registers like ESP.
+ // We can only merge with a reserved physreg if RHS has a single value that
+ // is a copy of CP.DstReg(). The live range of the reserved register will
+ // look like a set of dead defs - we don't properly track the live range of
+ // reserved registers.
+ if (RegClassInfo.isReserved(CP.getDstReg())) {
+ assert(CP.isFlipped() && RHS.containsOneValue() &&
+ "Invalid join with reserved register");
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) {
+ if (!LIS->hasInterval(*AS)) {
+ // Make sure at least DstReg itself exists before attempting a join.
+ if (*AS == CP.getDstReg())
+ LIS->getOrCreateInterval(CP.getDstReg());
+ continue;
+ }
+ if (RHS.overlaps(LIS->getInterval(*AS))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n');
+ return false;
+ }
+ }
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+ return true;
+ }
+
+ // Check if a register mask clobbers DstReg.
+ BitVector UsableRegs;
+ if (LIS->checkRegMaskInterference(RHS, UsableRegs) &&
+ !UsableRegs.test(CP.getDstReg())) {
+ DEBUG(dbgs() << "\t\tRegister mask interference.\n");
+ return false;
+ }
+
+ for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
if (!LIS->hasInterval(*AS))
continue;
const LiveInterval &LHS = LIS->getInterval(*AS);
@@ -1485,12 +1477,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ if (VNI->isUnused() || VNI->isPHIDef())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+ if (!MI->isCopyLike()) // Src not defined by a copy?
continue;
-
- // Never join with a register that has EarlyClobber redefs.
- if (VNI->hasRedefByEC())
- return false;
// Figure out the value # from the RHS.
LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1499,7 +1491,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// DstReg is known to be a register in the LHS interval. If the src is
// from the RHS interval, we can use its value #.
- MachineInstr *MI = VNI->getCopy();
if (!CP.isCoalescable(MI) &&
!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
continue;
@@ -1512,12 +1503,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ if (VNI->isUnused() || VNI->isPHIDef())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+ if (!MI->isCopyLike()) // Src not defined by a copy?
continue;
-
- // Never join with a register that has EarlyClobber redefs.
- if (VNI->hasRedefByEC())
- return false;
// Figure out the value # from the LHS.
LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1526,7 +1517,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// DstReg is known to be a register in the RHS interval. If the src is
// from the LHS interval, we can use its value #.
- MachineInstr *MI = VNI->getCopy();
if (!CP.isCoalescable(MI) &&
!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
continue;
@@ -1600,10 +1590,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
if (LHSValNoAssignments[I->valno->id] !=
RHSValNoAssignments[J->valno->id])
return false;
- // If it's re-defined by an early clobber somewhere in the live range,
- // then conservatively abort coalescing.
- if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
- return false;
}
if (I->end < J->end)
@@ -1905,8 +1891,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
unsigned Reg = MO.getReg();
if (!Reg)
continue;
+ DeadDefs.push_back(Reg);
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- DeadDefs.push_back(Reg);
// Remat may also enable register class inflation.
if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
InflateRegs.push_back(Reg);
@@ -1936,7 +1922,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// Check for now unnecessary kill flags.
if (LIS->isNotInMIMap(MI)) continue;
- SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex();
+ SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isKill()) continue;
@@ -1950,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// remain alive.
if (!TargetRegisterInfo::isPhysicalRegister(reg))
continue;
- for (const unsigned *SR = TRI->getSubRegisters(reg);
+ for (const uint16_t *SR = TRI->getSubRegisters(reg);
unsigned S = *SR; ++SR)
if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
MI->addRegisterDefined(S, TRI);
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 472c48377fef..310b933cab9b 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the abstract interface for register coalescers,
+// This file contains the abstract interface for register coalescers,
// allowing them to interact with and query register allocators.
//
//===----------------------------------------------------------------------===//
@@ -47,7 +47,7 @@ namespace llvm {
/// CrossClass - True when both regs are virtual, and newRC is constrained.
bool CrossClass;
- /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+ /// Flipped - True when DstReg and SrcReg are reversed from the original
/// copy instruction.
bool Flipped;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index ca02aa1b8143..03bd82e225dc 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
void RegScavenger::setUsed(unsigned Reg) {
RegsAvailable.reset(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
RegsAvailable.reset(SubReg);
}
@@ -45,7 +45,7 @@ void RegScavenger::setUsed(unsigned Reg) {
bool RegScavenger::isAliasUsed(unsigned Reg) const {
if (isUsed(Reg))
return true;
- for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+ for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R)
if (isUsed(*R))
return true;
return false;
@@ -59,9 +59,6 @@ void RegScavenger::initRegState() {
// All registers started out unused.
RegsAvailable.set();
- // Reserved registers are always used.
- RegsAvailable ^= ReservedRegs;
-
if (!MBB)
return;
@@ -86,17 +83,24 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
"Target changed?");
+ // It is not possible to use the register scavenger after late optimization
+ // passes that don't preserve accurate liveness information.
+ assert(MRI->tracksLiveness() &&
+ "Cannot use register scavenger with inaccurate liveness");
+
// Self-initialize.
if (!MBB) {
NumPhysRegs = TRI->getNumRegs();
RegsAvailable.resize(NumPhysRegs);
+ KillRegs.resize(NumPhysRegs);
+ DefRegs.resize(NumPhysRegs);
// Create reserved registers bitvector.
ReservedRegs = TRI->getReservedRegs(MF);
// Create callee-saved registers bitvector.
CalleeSavedRegs.resize(NumPhysRegs);
- const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
if (CSRegs != NULL)
for (unsigned i = 0; CSRegs[i]; ++i)
CalleeSavedRegs.set(CSRegs[i]);
@@ -110,13 +114,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
BV.set(Reg);
- for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
- BV.set(*R);
-}
-
-void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
- BV.set(Reg);
- for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
BV.set(*R);
}
@@ -148,12 +146,12 @@ void RegScavenger::forward() {
// predicated, conservatively assume "kill" markers do not actually kill the
// register. Similarly ignores "dead" markers.
bool isPred = TII->isPredicated(MI);
- BitVector EarlyClobberRegs(NumPhysRegs);
- BitVector KillRegs(NumPhysRegs);
- BitVector DefRegs(NumPhysRegs);
- BitVector DeadRegs(NumPhysRegs);
+ KillRegs.reset();
+ DefRegs.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -164,21 +162,19 @@ void RegScavenger::forward() {
// Ignore undef uses.
if (MO.isUndef())
continue;
- // Two-address operands implicitly kill.
- if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+ if (!isPred && MO.isKill())
addRegWithSubRegs(KillRegs, Reg);
} else {
assert(MO.isDef());
if (!isPred && MO.isDead())
- addRegWithSubRegs(DeadRegs, Reg);
+ addRegWithSubRegs(KillRegs, Reg);
else
addRegWithSubRegs(DefRegs, Reg);
- if (MO.isEarlyClobber())
- addRegWithAliases(EarlyClobberRegs, Reg);
}
}
// Verify uses and defs.
+#ifndef NDEBUG
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
@@ -199,17 +195,18 @@ void RegScavenger::forward() {
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
if (isUsed(SubReg)) {
SubUsed = true;
break;
}
- assert(SubUsed && "Using an undefined register!");
+ if (!SubUsed) {
+ MBB->getParent()->verify(NULL, "In Register Scavenger");
+ llvm_unreachable("Using an undefined register!");
+ }
(void)SubUsed;
}
- assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
- "Using an early clobbered register!");
} else {
assert(MO.isDef());
#if 0
@@ -221,18 +218,20 @@ void RegScavenger::forward() {
#endif
}
}
+#endif // NDEBUG
// Commit the changes.
setUnused(KillRegs);
- setUnused(DeadRegs);
setUsed(DefRegs);
}
void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ used = RegsAvailable;
+ used.flip();
if (includeReserved)
- used = ~RegsAvailable;
+ used |= ReservedRegs;
else
- used = ~RegsAvailable & ~ReservedRegs;
+ used.reset(ReservedRegs);
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -286,6 +285,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
// Remove any candidates touched by instruction.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ Candidates.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg() || MO.isUndef() || !MO.getReg())
continue;
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
@@ -296,7 +297,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
continue;
}
Candidates.reset(MO.getReg());
- for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+ for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++)
Candidates.reset(*R);
}
// If we're not in a virtual reg's live range, this is a valid
@@ -347,9 +348,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// RegsAvailable, as RegsAvailable does not take aliases into account.
// That's what getRegsAvailable() is for.
BitVector Available = getRegsAvailable(RC);
-
- if ((Candidates & Available).any())
- Candidates &= Available;
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 8b02ec44273a..6020908d9112 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -560,12 +560,13 @@ namespace llvm {
// For uses/defs recorded use/def indexes override current liveness and
// instruction operands (Only for the interval which records the indexes).
- if (i.isUse() || i.isDef()) {
+ // FIXME: This is all wrong, uses and defs share the same slots.
+ if (i.isEarlyClobber() || i.isRegister()) {
UseDefs::const_iterator udItr = useDefs.find(li);
if (udItr != useDefs.end()) {
const SlotSet &slotSet = udItr->second;
if (slotSet.count(i)) {
- if (i.isUse()) {
+ if (i.isEarlyClobber()) {
return Used;
}
// else
@@ -586,9 +587,9 @@ namespace llvm {
return AliveStack;
}
} else {
- if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+ if (i.isRegister() && mi->definesRegister(li->reg, tri)) {
return Defined;
- } else if (i.isUse() && mi->readsRegister(li->reg)) {
+ } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) {
return Used;
} else {
if (vrm == 0 ||
@@ -804,7 +805,7 @@ namespace llvm {
os << indent + s(2) << "<tr height=6ex>\n";
// Render the code column.
- if (i.isLoad()) {
+ if (i.isBlock()) {
MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
mi = sis->getInstructionFromIndex(i);
@@ -823,7 +824,7 @@ namespace llvm {
}
os << indent + s(4) << "</td>\n";
} else {
- i = i.getStoreIndex(); // <- Will be incremented to the next index.
+ i = i.getDeadSlot(); // <- Will be incremented to the next index.
continue;
}
}
@@ -952,10 +953,10 @@ namespace llvm {
rItr != rEnd; ++rItr) {
const MachineInstr *mi = &*rItr;
if (mi->readsRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+ useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true));
}
if (mi->definesRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+ useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot());
}
}
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1e9b5c89f172..8fd64265fda6 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -31,6 +31,8 @@ static cl::opt<bool> StressSchedOpt(
cl::desc("Stress test instruction scheduling"));
#endif
+void SchedulingPriorityQueue::anchor() { }
+
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()),
TII(TM.getInstrInfo()),
@@ -44,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
ScheduleDAG::~ScheduleDAG() {}
-/// getInstrDesc helper to handle SDNodes.
-const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
- if (!Node || !Node->isMachineOpcode()) return NULL;
- return &TII->get(Node->getMachineOpcode());
-}
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
- SU->dump(this);
- else
- dbgs() << "**** NOOP ****\n";
- }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
- BB = bb;
- InsertPos = insertPos;
-
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
SUnits.clear();
- Sequence.clear();
EntrySU = SUnit();
ExitSU = SUnit();
+}
- Schedule();
-
- DEBUG({
- dbgs() << "*** Final schedule ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
}
/// addPred - This adds the specified edge as a pred of the current node if
@@ -313,13 +290,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ch "; break;
}
- dbgs() << "#";
- dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
if (I->isAssignedRegDep())
- dbgs() << " Reg=" << G->TRI->getName(I->getReg());
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
dbgs() << "\n";
}
}
@@ -334,8 +310,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ch "; break;
}
- dbgs() << "#";
- dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
@@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
}
#ifndef NDEBUG
-/// VerifySchedule - Verify that all SUnits were scheduled and that
-/// their state is consistent.
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
///
-void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
bool AnyNotSched = false;
unsigned DeadNodes = 0;
- unsigned Noops = 0;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
if (!SUnits[i].isScheduled) {
if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
@@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
}
}
}
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
assert(!AnyNotSched);
- assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
- "The number of nodes scheduled doesn't match the expected number!");
+ return SUnits.size() - DeadNodes;
}
#endif
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
deleted file mode 100644
index f8b1bc76eb8b..000000000000
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the Emit routines for the ScheduleDAG class, which creates
-// MachineInstrs according to the computed schedule.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-void ScheduleDAG::EmitNoop() {
- TII->insertNoop(*BB, InsertPos);
-}
-
-void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
- DenseMap<SUnit*, unsigned> &VRBaseMap) {
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- if (I->getSUnit()->CopyDstRC) {
- // Copy to physical register.
- DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
- assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
- // Find the destination physical register.
- unsigned Reg = 0;
- for (SUnit::const_succ_iterator II = SU->Succs.begin(),
- EE = SU->Succs.end(); II != EE; ++II) {
- if (II->isCtrl()) continue; // ignore chain preds
- if (II->getReg()) {
- Reg = II->getReg();
- break;
- }
- }
- BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
- .addReg(VRI->second);
- } else {
- // Copy from physical register.
- assert(I->getReg() && "Unknown physical register!");
- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
- bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Node emitted out of order - early");
- BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
- .addReg(I->getReg());
- }
- break;
- }
-}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 34b8ab0b47f2..6be1ab7f5b08 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sched-instrs"
-#include "ScheduleDAGInstrs.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -33,25 +34,17 @@ using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt)
+ const MachineDominatorTree &mdt,
+ bool IsPostRAFlag,
+ LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
- InstrItins(mf.getTarget().getInstrItineraryData()),
- Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
- LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
+ IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
+ FirstDbgValue(0) {
+ assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
-}
-
-/// Run - perform scheduling.
-///
-void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endcount) {
- BB = bb;
- Begin = begin;
- InsertPosIndex = endcount;
-
- ScheduleDAG::Run(bb, end);
+ assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
+ "Virtual registers must be removed prior to PostRA scheduling");
}
/// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -133,19 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
- if (BB == ML->getLoopLatch()) {
- MachineBasicBlock *Header = ML->getHeader();
- for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
- E = Header->livein_end(); I != E; ++I)
- LoopLiveInRegs.insert(*I);
+ if (BB == ML->getLoopLatch())
LoopRegs.VisitLoop(ML);
- }
}
-/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+void ScheduleDAGInstrs::finishBlock() {
+ // Nothing to do.
+}
+
+/// Initialize the map with the number of registers.
+void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
+ PhysRegSet.setUniverse(Limit);
+ SUnits.resize(Limit);
+}
+
+/// Clear the map without deallocating storage.
+void Reg2SUnitsMap::clear() {
+ for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
+ SUnits[*I].clear();
+ }
+ PhysRegSet.clear();
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ RegionBegin = begin;
+ RegionEnd = end;
+ EndIndex = endcount;
+ MISUnitMap.clear();
+
+ // Check to see if the scheduler cares about latencies.
+ UnitLatencies = forceUnitLatencies();
+
+ ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier by adding
/// the exit SU to the register defs and use list. This is because we want to
/// make sure instructions which define registers that are either used by
@@ -153,11 +185,11 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
/// especially important when the definition latency of the return value(s)
/// are too high to be hidden by the branch or when the liveout registers
/// used by instructions in the fallthrough block.
-void ScheduleDAGInstrs::AddSchedBarrierDeps() {
- MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
ExitSU.setInstr(ExitMI);
bool AllDepKnown = ExitMI &&
- (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ (ExitMI->isCall() || ExitMI->isBarrier());
if (ExitMI && AllDepKnown) {
// If it's a call or a barrier, add dependencies on the defs and uses of
// instruction.
@@ -167,29 +199,313 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
- Uses[Reg].push_back(&ExitSU);
+ if (TRI->isPhysicalRegister(Reg))
+ Uses[Reg].push_back(&ExitSU);
+ else {
+ assert(!IsPostRA && "Virtual register encountered after regalloc.");
+ addVRegUseDeps(&ExitSU, i);
+ }
}
} else {
// For others, e.g. fallthrough, conditional branch, assume the exit
// uses all the registers that are livein to the successor blocks.
- SmallSet<unsigned, 8> Seen;
+ assert(Uses.empty() && "Uses in set before adding deps?");
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- if (Seen.insert(Reg))
+ if (!Uses.contains(Reg))
Uses[Reg].push_back(&ExitSU);
}
}
}
-void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
- // We'll be allocating one SUnit for each instruction, plus one for
- // the region exit node.
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
+ const MachineOperand &MO) {
+ assert(MO.isDef() && "expect physreg def");
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+ unsigned DataLatency = SU->Latency;
+
+ for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ if (!Uses.contains(*Alias))
+ continue;
+ std::vector<SUnit*> &UseList = Uses[*Alias];
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU == SU)
+ continue;
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Perhaps we should get rid of
+ // SpecialAddressLatency and just move this into
+ // adjustSchedDependency for the targets that care about it.
+ if (SpecialAddressLatency != 0 && !UnitLatencies &&
+ UseSU != &ExitSU) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
+ assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
+ if (RegUseIndex >= 0 &&
+ (UseMI->mayLoad() || UseMI->mayStore()) &&
+ (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+ UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ // Adjust the dependence latency using operand def/use
+ // information (if any), and then allow the target to
+ // perform its own adjustments.
+ const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+ if (!UnitLatencies) {
+ computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+ ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ }
+ UseSU->addPred(dep);
+ }
+ }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ if (!Defs.contains(*Alias))
+ continue;
+ std::vector<SUnit *> &DefList = Defs[*Alias];
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+ if (Kind == SDep::Anti)
+ DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+ else {
+ unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
+ DefSU->getInstr());
+ DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+ }
+ }
+ }
+ }
+
+ if (!MO.isDef()) {
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's uses.
+ // Push this SUnit on the use list.
+ Uses[MO.getReg()].push_back(SU);
+ }
+ else {
+ addPhysRegDataDeps(SU, MO);
+
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's defs.
+ std::vector<SUnit *> &DefList = Defs[MO.getReg()];
+
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ if (!UnitLatencies && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const TargetSubtargetInfo &ST =
+ TM.getSubtarget<TargetSubtargetInfo>();
+ unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseMCID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artificial edge.
+ ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
+ // clear this register's use list
+ if (Uses.contains(MO.getReg()))
+ Uses[MO.getReg()].clear();
+
+ if (!MO.isDead())
+ DefList.clear();
+
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ if (SU->isCall) {
+ while (!DefList.empty() && DefList.back()->isCall)
+ DefList.pop_back();
+ }
+ // Defs are pushed in the order they are visited and never reordered.
+ DefList.push_back(SU);
+ }
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // SSA defs do not have output/anti dependencies.
+ // The current operand is a def, so we have at least one.
+ if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+ return;
+
+ // Add output dependence to the next nearest def of this vreg.
+ //
+ // Unless this definition is dead, the output dependence should be
+ // transitively redundant with antidependencies from this definition's
+ // uses. We're conservative for now until we have a way to guarantee the uses
+ // are not eliminated sometime during scheduling. The output dependence edge
+ // is also useful if output latency exceeds def-use latency.
+ VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ if (DefI == VRegDefs.end())
+ VRegDefs.insert(VReg2SUnit(Reg, SU));
+ else {
+ SUnit *DefSU = DefI->SU;
+ if (DefSU != SU && DefSU != &ExitSU) {
+ unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
+ DefSU->getInstr());
+ DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+ }
+ DefI->SU = SU;
+ }
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // Lookup this operand's reaching definition.
+ assert(LIS && "vreg dependencies requires LiveIntervals");
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
+ LiveInterval *LI = &LIS->getInterval(Reg);
+ VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+ // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+ MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+ // Phis and other noninstructions (after coalescing) have a NULL Def.
+ if (Def) {
+ SUnit *DefSU = getSUnit(Def);
+ if (DefSU) {
+ // The reaching Def lives within this scheduling region.
+ // Create a data dependence.
+ //
+ // TODO: Handle "special" address latencies cleanly.
+ const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+ if (!UnitLatencies) {
+ // Adjust the dependence latency using operand def/use information, then
+ // allow the target to perform its own adjustments.
+ computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+ }
+ SU->addPred(dep);
+ }
+ }
+
+ // Add antidependence to the following def of the vreg it uses.
+ VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ if (DefI != VRegDefs.end() && DefI->SU != SU)
+ DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+ // We'll be allocating one SUnit for each real instruction in the region,
+ // which is contained within a basic block.
SUnits.reserve(BB->size());
+ for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SUnit *SU = newSUnit(MI);
+ MISUnitMap[MI] = SU;
+
+ SU->isCall = MI->isCall();
+ SU->isCommutable = MI->isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ computeLatency(SU);
+ }
+}
+
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
+ // Create an SUnit for each real instruction.
+ initSUnits();
+
// We build scheduling units by walking a block's instruction list from bottom
// to top.
@@ -203,29 +519,29 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
- // Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
-
- // Ask the target if address-backscheduling is desirable, and if so how much.
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValues.clear();
FirstDbgValue = NULL;
+ assert(Defs.empty() && Uses.empty() &&
+ "Only BuildGraph should update Defs/Uses");
+ Defs.setRegLimit(TRI->getNumRegs());
+ Uses.setRegLimit(TRI->getNumRegs());
+
+ assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+ // FIXME: Allow SparseSet to reserve space for the creation of virtual
+ // registers during scheduling. Don't artificially inflate the Universe
+ // because we want to assert that vregs are not created during DAG building.
+ VRegDefs.setUniverse(MRI.getNumVirtRegs());
+
// Model data dependencies between instructions being scheduled and the
// ExitSU.
- AddSchedBarrierDeps();
-
- for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
- assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
- }
+ addSchedBarrierDeps();
// Walk the list of instructions, from bottom moving up.
MachineInstr *PrevMI = NULL;
- for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
MachineInstr *MI = prior(MII);
if (MI && PrevMI) {
@@ -238,19 +554,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
continue;
}
- const MCInstrDesc &MCID = MI->getDesc();
- assert(!MCID.isTerminator() && !MI->isLabel() &&
+ assert(!MI->isTerminator() && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
- // Create the SUnit for this MI.
- SUnit *SU = NewSUnit(MI);
- SU->isCall = MCID.isCall();
- SU->isCommutable = MCID.isCommutable();
- // Assign the Latency field of SU using target-provided information.
- if (UnitLatencies)
- SU->Latency = 1;
- else
- ComputeLatency(SU);
+ SUnit *SU = MISUnitMap[MI];
+ assert(SU && "No SUnit mapped to this MI");
// Add register-based dependencies (data, anti, and output).
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -259,152 +567,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-
- std::vector<SUnit *> &UseList = Uses[Reg];
- // Defs are push in the order they are visited and never reordered.
- std::vector<SUnit *> &DefList = Defs[Reg];
- // Optionally add output and anti dependencies. For anti
- // dependencies we use a latency of 0 because for a multi-issue
- // target we want to allow the defining instruction to issue
- // in the same cycle as the using instruction.
- // TODO: Using a latency of 1 here for output dependencies assumes
- // there's no cost for reusing registers.
- SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
- for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
- SUnit *DefSU = DefList[i];
- if (DefSU == &ExitSU)
- continue;
- if (DefSU != SU &&
- (Kind != SDep::Output || !MO.isDead() ||
- !DefSU->getInstr()->registerDefIsDead(Reg)))
- DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- std::vector<SUnit *> &MemDefList = Defs[*Alias];
- for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
- SUnit *DefSU = MemDefList[i];
- if (DefSU == &ExitSU)
- continue;
- if (DefSU != SU &&
- (Kind != SDep::Output || !MO.isDead() ||
- !DefSU->getInstr()->registerDefIsDead(*Alias)))
- DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
- }
- }
-
- if (MO.isDef()) {
- // Add any data dependencies.
- unsigned DataLatency = SU->Latency;
- for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
- SUnit *UseSU = UseList[i];
- if (UseSU == SU)
- continue;
- unsigned LDataLatency = DataLatency;
- // Optionally add in a special extra latency for nodes that
- // feed addresses.
- // TODO: Do this for register aliases too.
- // TODO: Perhaps we should get rid of
- // SpecialAddressLatency and just move this into
- // adjustSchedDependency for the targets that care about it.
- if (SpecialAddressLatency != 0 && !UnitLatencies &&
- UseSU != &ExitSU) {
- MachineInstr *UseMI = UseSU->getInstr();
- const MCInstrDesc &UseMCID = UseMI->getDesc();
- int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
- assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
- if (RegUseIndex >= 0 &&
- (UseMCID.mayLoad() || UseMCID.mayStore()) &&
- (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
- UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
- LDataLatency += SpecialAddressLatency;
- }
- // Adjust the dependence latency using operand def/use
- // information (if any), and then allow the target to
- // perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
- if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
- }
- UseSU->addPred(dep);
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- std::vector<SUnit *> &UseList = Uses[*Alias];
- for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
- SUnit *UseSU = UseList[i];
- if (UseSU == SU)
- continue;
- const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
- if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
- }
- UseSU->addPred(dep);
- }
- }
-
- // If a def is going to wrap back around to the top of the loop,
- // backschedule it.
- if (!UnitLatencies && DefList.empty()) {
- LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
- if (I != LoopRegs.Deps.end()) {
- const MachineOperand *UseMO = I->second.first;
- unsigned Count = I->second.second;
- const MachineInstr *UseMI = UseMO->getParent();
- unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
- const MCInstrDesc &UseMCID = UseMI->getDesc();
- // TODO: If we knew the total depth of the region here, we could
- // handle the case where the whole loop is inside the region but
- // is large enough that the isScheduleHigh trick isn't needed.
- if (UseMOIdx < UseMCID.getNumOperands()) {
- // Currently, we only support scheduling regions consisting of
- // single basic blocks. Check to see if the instruction is in
- // the same region by checking to see if it has the same parent.
- if (UseMI->getParent() != MI->getParent()) {
- unsigned Latency = SU->Latency;
- if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
- Latency += SpecialAddressLatency;
- // This is a wild guess as to the portion of the latency which
- // will be overlapped by work done outside the current
- // scheduling region.
- Latency -= std::min(Latency, Count);
- // Add the artificial edge.
- ExitSU.addPred(SDep(SU, SDep::Order, Latency,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- } else if (SpecialAddressLatency > 0 &&
- UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
- // The entire loop body is within the current scheduling region
- // and the latency of this operation is assumed to be greater
- // than the latency of the loop.
- // TODO: Recursively mark data-edge predecessors as
- // isScheduleHigh too.
- SU->isScheduleHigh = true;
- }
- }
- LoopRegs.Deps.erase(I);
- }
- }
-
- UseList.clear();
- if (!MO.isDead())
- DefList.clear();
-
- // Calls will not be reordered because of chain dependencies (see
- // below). Since call operands are dead, calls may continue to be added
- // to the DefList making dependence checking quadratic in the size of
- // the block. Instead, we leave only one call at the back of the
- // DefList.
- if (SU->isCall) {
- while (!DefList.empty() && DefList.back()->isCall)
- DefList.pop_back();
- }
- DefList.push_back(SU);
- } else {
- UseList.push_back(SU);
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else {
+ assert(!IsPostRA && "Virtual register encountered!");
+ if (MO.isDef())
+ addVRegDefDeps(SU, j);
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
}
}
@@ -419,9 +589,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasVolatileMemoryRef() &&
- (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
for (std::map<const Value *, SUnit *>::iterator I =
@@ -460,7 +630,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
- } else if (MCID.mayStore()) {
+ } else if (MI->mayStore()) {
bool MayAlias = true;
TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -516,7 +686,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
/*Reg=*/0, /*isNormalMemory=*/false,
/*isMustAlias=*/false,
/*isArtificial=*/true));
- } else if (MCID.mayLoad()) {
+ } else if (MI->mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
@@ -558,32 +728,27 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
if (PrevMI)
FirstDbgValue = PrevMI;
- for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
- Defs[i].clear();
- Uses[i].clear();
- }
+ Defs.clear();
+ Uses.clear();
+ VRegDefs.clear();
PendingLoads.clear();
}
-void ScheduleDAGInstrs::FinishBlock() {
- // Nothing to do.
-}
-
-void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
// Compute the latency for the node.
if (!InstrItins || InstrItins->isEmpty()) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
// extra time.
- if (SU->getInstr()->getDesc().mayLoad())
+ if (SU->getInstr()->mayLoad())
SU->Latency += 2;
} else {
SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
}
}
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const {
if (!InstrItins || InstrItins->isEmpty())
return;
@@ -608,7 +773,9 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
// %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
// What we want is to compute latency between def of %D6/%D7 and use of
// %Q3 instead.
- DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (DefMI->getOperand(Op2).isReg())
+ DefIdx = Op2;
}
MachineInstr *UseMI = Use->getInstr();
// For all uses of the register, calculate the maxmimum latency
@@ -656,43 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
return oss.str();
}
-// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
- // For MachineInstr-based scheduling, we're rescheduling the instructions in
- // the block, so start by removing them from the block.
- while (Begin != InsertPos) {
- MachineBasicBlock::iterator I = Begin;
- ++Begin;
- BB->remove(I);
- }
-
- // If first instruction was a DBG_VALUE then put it back.
- if (FirstDbgValue)
- BB->insert(InsertPos, FirstDbgValue);
-
- // Then re-insert them according to the given schedule.
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
- BB->insert(InsertPos, SU->getInstr());
- else
- // Null SUnit* is a noop.
- EmitNoop();
- }
-
- // Update the Begin iterator, as the first instruction in the block
- // may have been scheduled later.
- if (!Sequence.empty())
- Begin = Sequence[0]->getInstr();
-
- // Reinsert any remaining debug_values.
- for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
- DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
- std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
- MachineInstr *DbgValue = P.first;
- MachineInstr *OrigPrivMI = P.second;
- BB->insertAfter(OrigPrivMI, DbgValue);
- }
- DbgValues.clear();
- FirstDbgValue = NULL;
- return BB;
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
deleted file mode 100644
index 666bdf548c71..000000000000
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ /dev/null
@@ -1,212 +0,0 @@
-//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ScheduleDAGInstrs class, which implements
-// scheduling for a MachineInstr-based dependency graph.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SCHEDULEDAGINSTRS_H
-#define SCHEDULEDAGINSTRS_H
-
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include <map>
-
-namespace llvm {
- class MachineLoopInfo;
- class MachineDominatorTree;
-
- /// LoopDependencies - This class analyzes loop-oriented register
- /// dependencies, which are used to guide scheduling decisions.
- /// For example, loop induction variable increments should be
- /// scheduled as soon as possible after the variable's last use.
- ///
- class LLVM_LIBRARY_VISIBILITY LoopDependencies {
- const MachineLoopInfo &MLI;
- const MachineDominatorTree &MDT;
-
- public:
- typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
- LoopDeps;
- LoopDeps Deps;
-
- LoopDependencies(const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt) :
- MLI(mli), MDT(mdt) {}
-
- /// VisitLoop - Clear out any previous state and analyze the given loop.
- ///
- void VisitLoop(const MachineLoop *Loop) {
- assert(Deps.empty() && "stale loop dependencies");
-
- MachineBasicBlock *Header = Loop->getHeader();
- SmallSet<unsigned, 8> LoopLiveIns;
- for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
- LE = Header->livein_end(); LI != LE; ++LI)
- LoopLiveIns.insert(*LI);
-
- const MachineDomTreeNode *Node = MDT.getNode(Header);
- const MachineBasicBlock *MBB = Node->getBlock();
- assert(Loop->contains(MBB) &&
- "Loop does not contain header!");
- VisitRegion(Node, MBB, Loop, LoopLiveIns);
- }
-
- private:
- void VisitRegion(const MachineDomTreeNode *Node,
- const MachineBasicBlock *MBB,
- const MachineLoop *Loop,
- const SmallSet<unsigned, 8> &LoopLiveIns) {
- unsigned Count = 0;
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *MI = I;
- if (MI->isDebugValue())
- continue;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (LoopLiveIns.count(MOReg))
- Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
- }
- ++Count; // Not every iteration due to dbg_value above.
- }
-
- const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
- for (std::vector<MachineDomTreeNode*>::const_iterator I =
- Children.begin(), E = Children.end(); I != E; ++I) {
- const MachineDomTreeNode *ChildNode = *I;
- MachineBasicBlock *ChildBlock = ChildNode->getBlock();
- if (Loop->contains(ChildBlock))
- VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
- }
- }
- };
-
- /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
- /// MachineInstrs.
- class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
- const MachineLoopInfo &MLI;
- const MachineDominatorTree &MDT;
- const MachineFrameInfo *MFI;
- const InstrItineraryData *InstrItins;
-
- /// Defs, Uses - Remember where defs and uses of each physical register
- /// are as we iterate upward through the instructions. This is allocated
- /// here instead of inside BuildSchedGraph to avoid the need for it to be
- /// initialized and destructed for each block.
- std::vector<std::vector<SUnit *> > Defs;
- std::vector<std::vector<SUnit *> > Uses;
-
- /// PendingLoads - Remember where unknown loads are after the most recent
- /// unknown store, as we iterate. As with Defs and Uses, this is here
- /// to minimize construction/destruction.
- std::vector<SUnit *> PendingLoads;
-
- /// LoopRegs - Track which registers are used for loop-carried dependencies.
- ///
- LoopDependencies LoopRegs;
-
- /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
- /// back-edge-aware scheduling.
- ///
- SmallSet<unsigned, 8> LoopLiveInRegs;
-
- protected:
-
- /// DbgValues - Remember instruction that preceeds DBG_VALUE.
- typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
- DbgValueVector;
- DbgValueVector DbgValues;
- MachineInstr *FirstDbgValue;
-
- public:
- MachineBasicBlock::iterator Begin; // The beginning of the range to
- // be scheduled. The range extends
- // to InsertPos.
- unsigned InsertPosIndex; // The index in BB of InsertPos.
-
- explicit ScheduleDAGInstrs(MachineFunction &mf,
- const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt);
-
- virtual ~ScheduleDAGInstrs() {}
-
- /// NewSUnit - Creates a new SUnit and return a ptr to it.
- ///
- SUnit *NewSUnit(MachineInstr *MI) {
-#ifndef NDEBUG
- const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
-#endif
- SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
- assert((Addr == 0 || Addr == &SUnits[0]) &&
- "SUnits std::vector reallocated on the fly!");
- SUnits.back().OrigNode = &SUnits.back();
- return &SUnits.back();
- }
-
- /// Run - perform scheduling.
- ///
- void Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endindex);
-
- /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
- /// input.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
-
- /// AddSchedBarrierDeps - Add dependencies from instructions in the current
- /// list of instructions being scheduled to scheduling barrier. We want to
- /// make sure instructions which define registers that are either used by
- /// the terminator or are live-out are properly scheduled. This is
- /// especially important when the definition latency of the return value(s)
- /// are too high to be hidden by the branch or when the liveout registers
- /// used by instructions in the fallthrough block.
- void AddSchedBarrierDeps();
-
- /// ComputeLatency - Compute node latency.
- ///
- virtual void ComputeLatency(SUnit *SU);
-
- /// ComputeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const;
-
- virtual MachineBasicBlock *EmitSchedule();
-
- /// StartBlock - Prepare to perform scheduling in the given block.
- ///
- virtual void StartBlock(MachineBasicBlock *BB);
-
- /// Schedule - Order nodes according to selected style, filling
- /// in the Sequence member.
- ///
- virtual void Schedule() = 0;
-
- /// FinishBlock - Clean up after scheduling in the given block.
- ///
- virtual void FinishBlock();
-
- virtual void dumpNode(const SUnit *SU) const;
-
- virtual std::string getGraphNodeLabel(const SUnit *SU) const;
- };
-}
-
-#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4b55a2284f85..38feee95a58e 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
#include <fstream>
using namespace llvm;
@@ -42,12 +41,12 @@ namespace llvm {
static bool renderGraphFromBottomUp() {
return true;
}
-
+
static bool hasNodeAddressLabel(const SUnit *Node,
const ScheduleDAG *Graph) {
return true;
}
-
+
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
static std::string getEdgeAttributes(const SUnit *Node,
@@ -59,7 +58,7 @@ namespace llvm {
return "color=blue,style=dashed";
return "";
}
-
+
std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
static std::string getNodeAttributes(const SUnit *N,
@@ -82,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
/// rendered using 'dot'.
///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
#ifndef NDEBUG
- if (BB->getBasicBlock())
- ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
- "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() +
- ":" + BB->getBasicBlock()->getNameStr());
- else
- ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
- "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+ ViewGraph(this, Name, false, Title);
#else
errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b80c01ed58b9..3d22035974da 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -140,8 +140,6 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
unsigned freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
- default:
- assert(0 && "Invalid FU reservation");
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[StageCycle];
@@ -194,8 +192,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
- default:
- assert(0 && "Invalid FU reservation");
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[cycle + i];
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 2282f0e6eb83..a6bdc3be32e0 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG
LegalizeTypesGeneric.cpp
LegalizeVectorOps.cpp
LegalizeVectorTypes.cpp
+ ResourcePriorityQueue.cpp
ScheduleDAGFast.cpp
- ScheduleDAGList.cpp
ScheduleDAGRRList.cpp
ScheduleDAGSDNodes.cpp
SelectionDAG.cpp
SelectionDAGBuilder.cpp
+ SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
+ ScheduleDAGVLIW.cpp
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMSelectionDAG
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b878688df63..d1b998f8d840 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22,7 +22,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -64,7 +63,24 @@ namespace {
bool LegalTypes;
// Worklist of all of the nodes that need to be simplified.
- std::vector<SDNode*> WorkList;
+ //
+ // This has the semantics that when adding to the worklist,
+ // the item added must be next to be processed. It should
+ // also only appear once. The naive approach to this takes
+ // linear time.
+ //
+ // To reduce the insert/remove time to logarithmic, we use
+ // a set and a vector to maintain our worklist.
+ //
+ // The set contains the items on the worklist, but does not
+ // maintain the order they should be visited.
+ //
+ // The vector maintains the order nodes should be visited, but may
+ // contain duplicate or removed nodes. When choosing a node to
+ // visit, we pop off the order stack until we find an item that is
+ // also in the contents set. All operations are O(log N).
+ SmallPtrSet<SDNode*, 64> WorkListContents;
+ SmallVector<SDNode*, 64> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -84,18 +100,17 @@ namespace {
SDValue visit(SDNode *N);
public:
- /// AddToWorkList - Add to the work list making sure it's instance is at the
- /// the back (next to be processed.)
+ /// AddToWorkList - Add to the work list making sure its instance is at the
+ /// back (next to be processed.)
void AddToWorkList(SDNode *N) {
- removeFromWorkList(N);
- WorkList.push_back(N);
+ WorkListContents.insert(N);
+ WorkListOrder.push_back(N);
}
/// removeFromWorkList - remove all instances of N from the worklist.
///
void removeFromWorkList(SDNode *N) {
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
- WorkList.end());
+ WorkListContents.erase(N);
}
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -159,7 +174,9 @@ namespace {
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
@@ -181,7 +198,9 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
@@ -279,7 +298,7 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
/// Run - runs the dag combiner on all nodes in the work list
@@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
+ const TargetOptions *Options,
unsigned Depth = 0) {
// No compile time optimizations on this type.
if (Op.getValueType() == MVT::ppcf128)
@@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
- if (HonorSignDependentRoundingFPMath()) return 0;
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+ Depth + 1);
}
}
@@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!HonorSignDependentRoundingFPMath());
+ assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
- LegalOperations = Level >= NoIllegalOperations;
- LegalTypes = Level >= NoIllegalTypes;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- WorkList.reserve(DAG.allnodes_size());
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
- WorkList.push_back(I);
+ AddToWorkList(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// done. Set it to null to avoid confusion.
DAG.setRoot(SDValue());
- // while the worklist isn't empty, inspect the node on the end of it and
+ // while the worklist isn't empty, find a node and
// try and combine it.
- while (!WorkList.empty()) {
- SDNode *N = WorkList.back();
- WorkList.pop_back();
+ while (!WorkListContents.empty()) {
+ SDNode *N;
+ // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+ // worklist *should* contain, and check the node we want to visit is should
+ // actually be visited.
+ do {
+ N = WorkListOrder.pop_back_val();
+ } while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
@@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
@@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
}
}
@@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
- if (N->hasNUsesOfValue(0, 1))
- return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
- return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
return SDValue();
}
@@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (N0C && N0C->isAllOnesValue())
+ return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
- if (N1C && N1C->getSExtValue() == 1LL)
+ if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
@@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
- (isPowerOf2_64(N1C->getSExtValue()) ||
- isPowerOf2_64(-N1C->getSExtValue()))) {
+ if (N1C && !N1C->isNullValue() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2DivCheap())
return SDValue();
- int64_t pow2 = N1C->getSExtValue();
- int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
- unsigned lg2 = Log2_64(abs2);
+ unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
@@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
- if (pow2 > 0)
+ if (N1C->getAPIntValue().isNonNegative())
return SRA;
AddToWorkList(SRA.getNode());
@@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
- if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
- !TLI.isIntDivCheap()) {
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
@@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ORNode, N0.getOperand(1));
}
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ && Level == AfterLegalizeVectorOps) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ // If both incoming values are integers, and the original types are the same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(Op.getNode());
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
@@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+ N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), Load->getDebugLoc(),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
- if (N1C && N0.getOpcode() == ISD::SRL &&
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
@@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
@@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
- APInt UnknownBits = ~KnownZero & Mask;
+ APInt UnknownBits = ~KnownZero;
if (UnknownBits == 0) return DAG.getConstant(1, VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
@@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N0VT = N0.getOperand(0).getValueType();
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = TLI.getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MatchingVectorType =
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
}
}
@@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return SDValue();
}
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp0 && COp0->isNullValue())
+ Op = Op1;
+ else if (COp1 && COp1->isNullValue())
+ Op = Op0;
+ else
+ return false;
+
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
N0.getOperand(0));
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+ return Op;
+ }
+ }
+
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV != 0 && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal) {
+ return DAG.getConstant(NewVal, V.getValueType());
+ }
+ break;
+ }
case ISD::OR:
case ISD::XOR:
// If the LHS or RHS don't contribute bits to the or, drop them.
@@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), NewAlign);
else
Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
@@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
@@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, MVT::i32));
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
LD1->getBasePtr(), LD1->getPointerInfo(),
- false, false, Align);
+ false, false, false, Align);
}
return SDValue();
@@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
- OrigAlign);
+ LN0->isInvariant(), OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
@@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
// This often reduces constant pool loads.
- if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
@@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
// fold (fadd A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N1CFP->getValueAPF().isZero())
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if (isNegatibleForFree(N1, LegalOperations) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
- if (isNegatibleForFree(N0, LegalOperations) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
// If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
- N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
@@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
// fold (fsub A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N0;
// fold (fsub 0, B) -> -B
- if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
+ // If 'unsafe math' is enabled, fold
+ // (fsub x, (fadd x, y)) -> (fneg y) &
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+ else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
return SDValue();
}
@@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
// fold (fmul A, 0) -> 0
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N1;
// fold (fmul A, 0) -> 0, vector edition.
- if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N0.getOpcode() == ISD::FMUL &&
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
// fold (sint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128 &&
// ...but only if the target supports immediate floating-point values
- (Level == llvm::Unrestricted ||
+ (!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
@@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// fold (uint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128 &&
// ...but only if the target supports immediate floating-point values
- (Level == llvm::Unrestricted ||
+ (!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
@@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (isNegatibleForFree(N0, LegalOperations))
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST &&
+ if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = Use->getValueType(0);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getValue().getValueType();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
/// CombineToPreIndexedLoadStore - Try turning a load / store into a
/// pre-indexed load / store when the base pointer is an add or subtract
/// and it has other uses besides the load / store. After the
@@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
/// the add / subtract in and all of its other uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
- if (!LegalOperations)
+ if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
@@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (N->hasPredecessorHelper(Use, Visited, Worklist))
return false;
- if (!((Use->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
- (Use->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
RealUse = true;
}
@@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
/// load / store effectively and all of its uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
- if (!LegalOperations)
+ if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
@@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
continue;
// Try turning it into a post-indexed load / store except when
- // 1) All uses are load / store ops that use it as base ptr.
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
// 2) Op must be independent of N, i.e. Op is neither a predecessor
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
@@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
for (SDNode::use_iterator III = Use->use_begin(),
EEE = Use->use_end(); III != EEE; ++III) {
SDNode *UseUse = *III;
- if (!((UseUse->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
- (UseUse->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
@@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (!LD->isVolatile()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
- if (N->hasNUsesOfValue(0, 0)) {
+ if (!N->hasAnyUseOfValue(0)) {
// It's not safe to use the two value CombineTo variant here. e.g.
// v1, chain2 = load chain1, loc
// v2, chain3 = load chain2, loc
@@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
@@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
LD->getValueType(0),
@@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
- NewAlign);
+ LD->isInvariant(), NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
@@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
- false, false, LDAlign);
+ false, false, false, LDAlign);
SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
NewLD, ST->getBasePtr(),
@@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = InVec.getOperand(0);
- EVT NVT = N->getValueType(0);
if (InOp.getValueType() != NVT) {
assert(InOp.getValueType().isInteger() && NVT.isInteger());
return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return InOp;
}
+ SDValue EltNo = N->getOperand(1);
+ bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD patterns.
+ // For example on AVX, extracting elements from a wide vector without using
+ // extract_subvector.
+ if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+ && ConstEltNo && !LegalOperations) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(Elt);
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ if (OrigElt < NumElem) {
+ InVec = InVec->getOperand(0);
+ } else {
+ InVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+ InVec, DAG.getConstant(OrigElt, MVT::i32));
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
- SDValue EltNo = N->getOperand(1);
- if (isa<ConstantSDNode>(EltNo)) {
+ if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
- EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// =>
// (load $addr+1*size)
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
if (BCNumEltsChanged)
@@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BITCAST)
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
InVec = InVec.getOperand(0);
+ }
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
}
}
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
@@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
DAG.getConstant(PtrOff, PtrType));
}
- return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(), Align);
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+ // Since we're explcitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(N);
+ return SDValue(N, 0);
}
return SDValue();
@@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+ bool AllUndef = true;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.getOpcode() == ISD::UNDEF) continue;
+ AllUndef = false;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(VT);
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = N->getValueType(0).getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+ ValidTypes) {
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i=0; i < N->getNumOperands(); ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+ }
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
SDValue VecIn1, VecIn2;
for (unsigned i = 0; i != NumInScalars; ++i) {
// Ignore undef inputs.
@@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
break;
}
- // If the input vector type disagrees with the result of the build_vector,
- // we can't make a shuffle.
+ // We allow up to two distinct input vectors.
SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
- if (ExtractedFromVec.getValueType() != VT) {
- VecIn1 = VecIn2 = SDValue(0, 0);
- break;
- }
-
- // Otherwise, remember this. We allow up to two distinct input vectors.
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
@@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
- // If everything is good, we can make a shuffle operation.
+ // If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Mask.push_back(Idx+NumInScalars);
}
- // Add count and size info.
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // We don't support shuffeling between TWO values of different types.
+ if (VecIn2.getNode() != 0)
+ return SDValue();
+
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+ return SDValue();
+
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ }
+
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
- Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ Ops[1] = VecIn2;
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
}
@@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
- // Combine:
- // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
- // Into:
- // indicies are equal => V1
- // otherwise => (extract_subvec V1, ExtIdx)
- //
- SDValue InsIdx = N->getOperand(1);
- SDValue ExtIdx = V->getOperand(2);
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx == ExtIdx)
- return V->getOperand(1);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
- V->getOperand(0), N->getOperand(1));
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+ return V->getOperand(1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+ V->getOperand(0), N->getOperand(1));
+ }
}
return SDValue();
@@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- assert(N0.getValueType().getVectorNumElements() == NumElts &&
- "Vector shuffle must be normalized in DAG");
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.getOpcode() == ISD::UNDEF) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElts)
+ Idx += NumElts;
+ else
+ Idx -= NumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
- // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+ // Remove references to rhs if it is undef
+ if (N1.getOpcode() == ISD::UNDEF) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+ }
// If it is a splat, check if the argument vector is another splat or a
// build_vector with all scalar elements the same.
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return N0;
}
}
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ assert(Idx < (int)NumElts && "Index references undef operand");
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ // The combined shuffle must map each index to itself.
+ if (Idx >= 0 && (unsigned)Idx != i)
+ return SDValue();
+ }
+
+ return OtherSV->getOperand(0);
+ }
+
return SDValue();
}
@@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt))
return SDValue();
- else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
Indices.push_back(NumElts);
@@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
EVT VT = LHSOp.getValueType();
- assert(RHSOp.getValueType() == VT &&
- "SimplifyVBinOp with different BUILD_VECTOR element types");
+ EVT RVT = RHSOp.getValueType();
+ if (RVT != VT) {
+ // Integer BUILD_VECTOR operands may have types larger than the element
+ // size (e.g., when the element type is not legal). Prior to type
+ // legalization, the types may not match between the two BUILD_VECTORS.
+ // Truncate one of the operands to make them match.
+ if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+ RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+ } else {
+ LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+ VT = RVT;
+ }
+ }
SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
@@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if ((LLD->hasAnyUseOfValue(1) &&
(LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
- (LLD->hasAnyUseOfValue(1) &&
- (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ (RLD->hasAnyUseOfValue(1) &&
+ (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
@@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// FIXME: Discards pointer info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
- LLD->getAlignment());
+ LLD->isInvariant(), LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
RLD->getExtensionType() : LLD->getExtensionType(),
@@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
- false, Alignment);
+ false, false, Alignment);
}
}
@@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
- N0.getValueType().isInteger() &&
- N2.getValueType().isInteger() &&
(N1C->isNullValue() || // (a < 0) ? b : 0
(N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
EVT XType = N0.getValueType();
@@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+ SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
@@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+ SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
@@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
- int &SrcValueOffset,
- unsigned &SrcValueAlign,
- const MDNode *&TBAAInfo) const {
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- Ptr = LD->getBasePtr();
- Size = LD->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = LD->getSrcValue();
- SrcValueOffset = LD->getSrcValueOffset();
- SrcValueAlign = LD->getOriginalAlignment();
- TBAAInfo = LD->getTBAAInfo();
- return true;
- }
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- Ptr = ST->getBasePtr();
- Size = ST->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = ST->getSrcValue();
- SrcValueOffset = ST->getSrcValueOffset();
- SrcValueAlign = ST->getOriginalAlignment();
- TBAAInfo = ST->getTBAAInfo();
- return false;
- }
- llvm_unreachable("FindAliasInfo expected a memory operand");
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+ Ptr = LS->getBasePtr();
+ Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LS->getSrcValue();
+ SrcValueOffset = LS->getSrcValueOffset();
+ SrcValueAlign = LS->getOriginalAlignment();
+ TBAAInfo = LS->getTBAAInfo();
+ return isa<LoadSDNode>(LS);
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e8f8c73d6883..0c1ac6982d2a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "isel"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
@@ -58,8 +59,15 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
/// startNewBlock - Set the current block to which generated machine
/// instructions will be appended, and clear the local CSE map.
///
@@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const {
!hasTrivialKill(Cast->getOperand(0)))
return false;
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
// Only instructions with a single use in the same basic block are considered
// to have trivial kills.
return I->hasOneUse() &&
@@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
return 0;
}
- // Look up the value to see if we already have a register for it. We
- // cache values defined by Instructions across blocks, and other values
- // only locally. This is because Instructions already have the SSA
- // def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
- if (I != FuncInfo.ValueMap.end())
- return I->second;
-
- unsigned Reg = LocalValueMap[V];
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
if (Reg != 0)
return Reg;
@@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
uint32_t IntBitWidth = IntVT.getSizeInBits();
bool isExact;
(void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ APFloat::rmTowardZero, &isExact);
if (isExact) {
APInt IntVal(IntBitWidth, x);
@@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
}
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
FastISel::SavePoint FastISel::enterLocalValueArea() {
MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
DebugLoc OldDL = DL;
@@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::SRA;
}
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
if (ResultReg == 0) return false;
@@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
bool NIsKill = hasTrivialKill(I->getOperand(0));
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
- // FIXME: This can be optimized by combining the add with a
- // subsequent one.
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
- NIsKill = true;
+ TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
}
Ty = StTy->getElementType(Field);
} else {
@@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ // N = N + Offset
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
- continue;
+ TotalOffs = 0;
}
// N = N + Idx * ElementSize;
@@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
return false;
}
}
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, N);
@@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
const Function *F = Call->getCalledFunction();
if (!F) return false;
// Handle selected intrinsic function calls.
switch (F->getIntrinsicID()) {
default: break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
- !FuncInfo.MF->getMMI().hasDebugInfo())
+ !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
const Value *Address = DI->getAddress();
- if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
unsigned Reg = 0;
unsigned Offset = 0;
@@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) {
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
}
if (!Reg)
- Reg = getRegForValue(Address);
+ Reg = lookUpRegForValue(Address);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Reg = FuncInfo.InitializeRegForValue(Address);
if (Reg)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset)
.addMetadata(DI->getVariable());
+ else
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return true;
}
case Intrinsic::dbg_value: {
@@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) {
}
return true;
}
- case Intrinsic::eh_exception: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
- break;
-
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- unsigned Reg = TLI.getExceptionAddressRegister();
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
- UpdateValueMap(Call, ResultReg);
- return true;
- }
- case Intrinsic::eh_selector: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
- break;
- if (FuncInfo.MBB->isLandingPad())
- AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(Call);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- unsigned Reg = TLI.getExceptionSelectorRegister();
- EVT SrcVT = TLI.getPointerTy();
- const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
-
- bool ResultRegIsKill = hasTrivialKill(Call);
-
- // Cast the register to the type of the selector.
- if (SrcVT.bitsGT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg, ResultRegIsKill);
- else if (SrcVT.bitsLT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
- if (ResultReg == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
-
- UpdateValueMap(Call, ResultReg);
-
- return true;
- }
case Intrinsic::objectsize: {
ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) {
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
- TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
- TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
@@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) {
DL = I->getDebugLoc();
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
}
+ // Remove dead code. However, ignore call instructions since we've flushed
+ // the local value map and recomputed the insert point.
+ if (!isa<CallInst>(I)) {
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ }
// Next, try calling the target to attempt to handle the instruction.
+ SavedInsertPt = FuncInfo.InsertPt;
if (TargetSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
DL = DebugLoc();
return true;
}
+ // Check for dead code and remove as necessary.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
DL = DebugLoc();
return false;
@@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) {
/// the CFG.
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
- if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
- // The unconditional fall-through case, which needs no instructions.
+
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only instruction
+ // in the block then emit it, otherwise we have the unconditional
+ // fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
@@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
- // Promote MVT::i1.
- if (VT == MVT::i1)
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
else {
FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b052740a1abe..8dde919079d9 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
GetReturnInfo(Fn->getReturnType(),
Fn->getAttributes().getRetAttributes(), Outs, TLI);
CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(),
+ Fn->isVarArg(),
Outs, Fn->getContext());
// Initialize the mapping of values to registers. This is only set up for
@@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
// candidate. I.e., it would trigger the creation of a stack protector.
bool MayNeedSP =
(AI->isArrayAllocation() ||
- (TySize > 8 && isa<ArrayType>(Ty) &&
+ (TySize >= 8 && isa<ArrayType>(Ty) &&
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+ MayNeedSP);
}
for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
/// argument. This overrides previous frame index entry for this argument,
/// if any.
void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
- int FI) {
+ int FI) {
ByValArgFrameIndexMap[A] = FI;
}
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
- DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
return 0;
}
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
/// call, and add them to the specified machine basic block.
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
- MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
- SmallPtrSet<const BasicBlock*, 4> Visited;
-
- // The 'eh.selector' call may not be in the direct successor of a basic block,
- // but could be several successors deeper. If we don't find it, try going one
- // level further. <rdar://problem/8824861>
- while (Visited.insert(SuccBB)) {
- for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
- I != E; ++I)
- if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
- // Apply the catch info to LPad.
- AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
- if (!FLI.MBBMap[SuccBB]->isLandingPad())
- FLI.CatchInfoFound.insert(EHSel);
-#endif
- return;
- }
-
- const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
- if (Br && Br->isUnconditional())
- SuccBB = Br->getSuccessor(0);
- else
- break;
- }
-}
-
/// AddLandingPadInfo - Extract the exception handling information from the
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8f8715..1467d887789c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
DstRC = TII->getRegClass(*II, IIOpNum, TRI);
- assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+ assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
for (unsigned i = 1; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
if ((i & 1) == 0) {
- unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
- unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (SRC && SRC != RC) {
- MRI->setRegClass(NewVReg, SRC);
- RC = SRC;
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
}
}
AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Create the new machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
- // The MachineInstr constructor adds implicit-def operands. Scan through
- // these to determine which are dead.
- if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
- // First, collect all used registers.
- SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
- if (F->getOpcode() == ISD::CopyFromReg)
- UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
- else {
- // Collect declared implicit uses.
- const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
- UsedRegs.append(MCID.getImplicitUses(),
- MCID.getImplicitUses() + MCID.getNumImplicitUses());
- // In addition to declared implicit uses, we must also check for
- // direct RegisterSDNode operands.
- for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
- if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- UsedRegs.push_back(Reg);
- }
- }
- // Then mark unused registers as dead.
- MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
- }
-
// Add result register values for things that are defined by this
// instruction.
if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
- if (Node->hasAnyUseOfValue(i))
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
- // If there are no uses, mark the register as dead now, so that
- // MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Glue value, for the benefit of targets still using
- // Glue for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- MI->addRegisterDead(Reg, TRI);
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
}
}
- // If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any glue outputs, we don't do this
- // because we don't know what implicit defs are being used by glued nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- if (const unsigned *IDList = II.getImplicitDefs()) {
- for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
- i != e; ++i)
- MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
}
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
#ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
Node->dump();
#endif
llvm_unreachable("This target-independent node should have been selected!");
- break;
case ISD::EntryToken:
llvm_unreachable("EntryToken should have been excluded from the schedule!");
- break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
new file mode 100644
index 000000000000..81d2e000a2e8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SelectionDAG
+parent = CodeGen
+required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 63255ae2ebd9..a96a99781f4e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -46,37 +46,18 @@ using namespace llvm;
/// will attempt merge setcc and brc instructions into brcc's.
///
namespace {
-class SelectionDAGLegalize {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- // Libcall insertion helpers.
-
- /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
- /// legalized. We use this to ensure that calls are properly serialized
- /// against each other, including inserted libcalls.
- SDValue LastCALLSEQ_END;
-
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
-
- /// LegalizedNodes - For nodes that are of legal width, and that have more
- /// than one use, this map indicates what regularized operand to use. This
- /// allows us to avoid legalizing the same thing more than once.
- DenseMap<SDValue, SDValue> LegalizedNodes;
+ /// LegalizePosition - The iterator for walking through the node list.
+ SelectionDAG::allnodes_iterator LegalizePosition;
- void AddLegalizedOperand(SDValue From, SDValue To) {
- LegalizedNodes.insert(std::make_pair(From, To));
- // If someone requests legalization of the new node, return itself.
- if (From != To)
- LegalizedNodes.insert(std::make_pair(To, To));
+ /// LegalizedNodes - The set of nodes which have already been legalized.
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
- // Transfer SDDbgValues.
- DAG.TransferDbgValues(From, To);
- }
+ // Libcall insertion helpers.
public:
explicit SelectionDAGLegalize(SelectionDAG &DAG);
@@ -84,9 +65,8 @@ public:
void LegalizeDAG();
private:
- /// LegalizeOp - Return a legal replacement for the given operation, with
- /// all legal operands.
- SDValue LegalizeOp(SDValue O);
+ /// LegalizeOp - Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -105,10 +85,7 @@ private:
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const;
-
- bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
- SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+ ArrayRef<int> Mask) const;
void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
DebugLoc dl);
@@ -150,10 +127,46 @@ private:
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
- void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandNode(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+ void ForgetNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+ ++LegalizePosition;
+ }
+
+public:
+ // DAGUpdateListener implementation.
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ ForgetNode(N);
+ }
+ virtual void NodeUpdated(SDNode *N) {}
+
+ // Node replacement helpers
+ void ReplacedNode(SDNode *N) {
+ if (N->use_empty()) {
+ DAG.RemoveDeadNode(N, this);
+ } else {
+ ForgetNode(N);
+ }
+ }
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old);
+ }
+ void ReplaceNode(SDValue Old, SDValue New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old.getNode());
+ }
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old);
+ }
};
}
@@ -164,7 +177,7 @@ private:
SDValue
SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const {
+ ArrayRef<int> Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
unsigned NumDestElts = NVT.getVectorNumElements();
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
}
void SelectionDAGLegalize::LegalizeDAG() {
- LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
-
- // The legalize process is inherently a bottom-up recursive process (users
- // legalize their uses before themselves). Given infinite stack space, we
- // could just start legalizing on the root and traverse the whole graph. In
- // practice however, this causes us to run out of stack space on large basic
- // blocks. To avoid this problem, compute an ordering of the nodes where each
- // node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
- LegalizeOp(SDValue(I, 0));
- // Finally, it's possible the root changed. Get the new root.
- SDValue OldRoot = DAG.getRoot();
- assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
- DAG.setRoot(LegalizedNodes[OldRoot]);
-
- LegalizedNodes.clear();
-
- // Remove dead nodes now.
- DAG.RemoveDeadNodes();
-}
-
-
-/// FindCallEndFromCallStart - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
- // Nested CALLSEQ_START/END constructs aren't yet legal,
- // but we can DTRT and handle them correctly here.
- if (Node->getOpcode() == ISD::CALLSEQ_START)
- depth++;
- else if (Node->getOpcode() == ISD::CALLSEQ_END) {
- depth--;
- if (depth == 0)
- return Node;
- }
- if (Node->use_empty())
- return 0; // No CallSeqEnd
-
- // The chain is usually at the end.
- SDValue TheChain(Node, Node->getNumValues()-1);
- if (TheChain.getValueType() != MVT::Other) {
- // Sometimes it's at the beginning.
- TheChain = SDValue(Node, 0);
- if (TheChain.getValueType() != MVT::Other) {
- // Otherwise, hunt for it.
- for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
- if (Node->getValueType(i) == MVT::Other) {
- TheChain = SDValue(Node, i);
- break;
- }
-
- // Otherwise, we walked into a node without a chain.
- if (TheChain.getValueType() != MVT::Other)
- return 0;
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (LegalizePosition = DAG.allnodes_end();
+ LegalizePosition != DAG.allnodes_begin(); ) {
+ --LegalizePosition;
+
+ SDNode *N = LegalizePosition;
+ if (LegalizedNodes.insert(N)) {
+ AnyLegalized = true;
+ LegalizeOp(N);
+ }
}
- }
-
- for (SDNode::use_iterator UI = Node->use_begin(),
- E = Node->use_end(); UI != E; ++UI) {
-
- // Make sure to only follow users of our token chain.
- SDNode *User = *UI;
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
- if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User, depth))
- return Result;
- }
- return 0;
-}
-
-/// FindCallStartFromCallEnd - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_START node that initiates the call sequence.
-static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
- int nested = 0;
- assert(Node && "Didn't find callseq_start for a call??");
- while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
- Node = Node->getOperand(0).getNode();
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- switch (Node->getOpcode()) {
- default:
+ if (!AnyLegalized)
break;
- case ISD::CALLSEQ_START:
- if (!nested)
- return Node;
- nested--;
- break;
- case ISD::CALLSEQ_END:
- nested++;
- break;
- }
- }
- return 0;
-}
-
-/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
-/// see if any uses can reach Dest. If no dest operands can get to dest,
-/// legalize them, legalize ourself, and return false, otherwise, return true.
-///
-/// Keep track of the nodes we fine that actually do lead to Dest in
-/// NodesLeadingTo. This avoids retraversing them exponential number of times.
-///
-bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
- SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
- if (N == Dest) return true; // N certainly leads to Dest :)
-
- // If we've already processed this node and it does lead to Dest, there is no
- // need to reprocess it.
- if (NodesLeadingTo.count(N)) return true;
-
- // If the first result of this node has been already legalized, then it cannot
- // reach N.
- if (LegalizedNodes.count(SDValue(N, 0))) return false;
-
- // Okay, this node has not already been legalized. Check and legalize all
- // operands. If none lead to Dest, then we can legalize this node.
- bool OperandsLeadToDest = false;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- OperandsLeadToDest |= // If an operand leads to Dest, so do we.
- LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
- NodesLeadingTo);
- if (OperandsLeadToDest) {
- NodesLeadingTo.insert(N);
- return true;
}
- // Okay, this node looks safe, legalize it and return false.
- LegalizeOp(SDValue(N, 0));
- return false;
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
}
/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
-static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
- SelectionDAG &DAG, const TargetLowering &TLI) {
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
bool Extend = false;
DebugLoc dl = CFP->getDebugLoc();
@@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
- DAG.getEntryNode(),
- CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, Alignment);
- return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false, false,
- Alignment);
+ if (Extend) {
+ SDValue Result =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+ DAG.getEntryNode(),
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
+ return Result;
+ }
+ SDValue Result =
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false, false, false,
+ Alignment);
+ return Result;
}
/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
-static
-SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SelectionDAGLegalize *DAGLegalize) {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
SDValue Val = ST->getValue();
@@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
}
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Load one integer register's worth from the stack slot.
SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
@@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// The order of the stores doesn't matter - say it with a TokenFactor.
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
}
assert(ST->getMemoryVT().isInteger() &&
!ST->getMemoryVT().isVector() &&
@@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
}
/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
-static
-SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SDValue &ValResult, SDValue &ChainResult) {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
- SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Result;
+ ChainResult = Chain;
+ return;
}
// Copy the value to a (aligned) stack slot using (unaligned) integer
@@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
@@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
MachinePointerInfo(), LoadedVT, false, false, 0);
// Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Load;
+ ChainResult = TF;
+ return;
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- SDValue Ops[] = { Result, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Result;
+ ChainResult = TF;
}
/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
@@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false,
+ false, 0);
}
@@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
-SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
- if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
- return Op;
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return;
- SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- if (I != LegalizedNodes.end()) return I->second;
-
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
- SDValue Result = Op;
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
@@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
- case ISD::VAARG:
case ISD::STACKSAVE:
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
- case ISD::BUILD_VECTOR:
- // A weird case: legalization for BUILD_VECTOR never legalizes the
- // operands!
- // FIXME: This really sucks... changing it isn't semantically incorrect,
- // but it massively pessimizes the code for floating-point BUILD_VECTORs
- // because ConstantFP operands get legalized into constant pool loads
- // before the BUILD_VECTOR code can see them. It doesn't usually bite,
- // though, because BUILD_VECTORS usually get lowered into other nodes
- // which get legalized properly.
- SimpleFinishLegalizing = false;
- break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
if (SimpleFinishLegalizing) {
- SmallVector<SDValue, 8> Ops, ResultVals;
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ Ops.push_back(Node->getOperand(i));
switch (Node->getOpcode()) {
default: break;
- case ISD::BR:
- case ISD::BRIND:
- case ISD::BR_JT:
- case ISD::BR_CC:
- case ISD::BRCOND:
- // Branches tweak the chain to include LastCALLSEQ_END
- Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- LastCALLSEQ_END);
- Ops[0] = LegalizeOp(Ops[0]);
- LastCALLSEQ_END = DAG.getEntryNode();
- break;
case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
@@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::ROTR:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[1].getValueType().isVector())
- Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
- Ops[1]));
+ if (!Ops[1].getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ Ops[1] = Handle.getValue();
+ }
break;
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
case ISD::SHL_PARTS:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[2].getValueType().isVector())
- Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
- Ops[2]));
+ if (!Ops[2].getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ Ops[2] = Handle.getValue();
+ }
break;
}
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
- Ops.size()), 0);
+ SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ if (NewNode != Node) {
+ DAG.ReplaceAllUsesWith(Node, NewNode, this);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+ ReplacedNode(Node);
+ Node = NewNode;
+ }
switch (Action) {
case TargetLowering::Legal:
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- ResultVals.push_back(Result.getValue(i));
- break;
+ return;
case TargetLowering::Custom:
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- Tmp1 = TLI.LowerOperation(Result, DAG);
+ Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Tmp1.getNode()) {
+ SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
if (e == 1)
ResultVals.push_back(Tmp1);
else
ResultVals.push_back(Tmp1.getValue(i));
}
- break;
+ if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+ ReplacedNode(Node);
+ }
+ return;
}
// FALL THROUGH
case TargetLowering::Expand:
- ExpandNode(Result.getNode(), ResultVals);
- break;
+ ExpandNode(Node);
+ return;
case TargetLowering::Promote:
- PromoteNode(Result.getNode(), ResultVals);
- break;
- }
- if (!ResultVals.empty()) {
- for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
- if (ResultVals[i] != SDValue(Node, i))
- ResultVals[i] = LegalizeOp(ResultVals[i]);
- AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
- }
- return ResultVals[Op.getResNo()];
+ PromoteNode(Node);
+ return;
}
}
@@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Node->dump( &DAG);
dbgs() << "\n";
#endif
- assert(0 && "Do not know how to legalize this operator!");
+ llvm_unreachable("Do not know how to legalize this operator!");
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL: {
- // Scalarize vector SRA/SRL/SHL.
- EVT VT = Node->getValueType(0);
- assert(VT.isVector() && "Unable to legalize non-vector shift");
- assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
- unsigned NumElem = VT.getVectorNumElements();
-
- SmallVector<SDValue, 8> Scalars;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(0), DAG.getIntPtrConstant(Idx));
- SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(1), DAG.getIntPtrConstant(Idx));
- Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
- VT.getScalarType(), Ex, Sh));
- }
- Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
- &Scalars[0], Scalars.size());
- break;
- }
-
- case ISD::BUILD_VECTOR:
- switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Result = Tmp3;
- break;
- }
- // FALLTHROUGH
- case TargetLowering::Expand:
- Result = ExpandBUILD_VECTOR(Result.getNode());
- break;
- }
- break;
- case ISD::CALLSEQ_START: {
- SDNode *CallEnd = FindCallEndFromCallStart(Node);
-
- // Recursively Legalize all of the inputs of the call end that do not lead
- // to this call start. This ensures that any libcalls that need be inserted
- // are inserted *before* the CALLSEQ_START.
- {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
- for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
- LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
- NodesLeadingTo);
- }
-
- // Now that we have legalized all of the inputs (which may have inserted
- // libcalls), create the new CALLSEQ_START node.
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
-
- // Merge in the last call to ensure that this call starts after the last
- // call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
- Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Tmp1, LastCALLSEQ_END);
- Tmp1 = LegalizeOp(Tmp1);
- }
-
- // Do not try to legalize the target-specific arguments (#1+).
- if (Tmp1 != Node->getOperand(0)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
- Ops.size()), Result.getResNo());
- }
-
- // Remember that the CALLSEQ_START is legalized.
- AddLegalizedOperand(Op.getValue(0), Result);
- if (Node->getNumValues() == 2) // If this has a flag result, remember it.
- AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
-
- // Now that the callseq_start and all of the non-call nodes above this call
- // sequence have been legalized, legalize the call itself. During this
- // process, no libcalls can/will be inserted, guaranteeing that no calls
- // can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
- // Note that we are selecting this call!
- LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
-
- // Legalize the call, starting from the CALLSEQ_END.
- LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
- return Result;
- }
+ case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
- // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
- // will cause this node to be legalized as well as handling libcalls right.
- if (LastCALLSEQ_END.getNode() != Node) {
- LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- assert(I != LegalizedNodes.end() &&
- "Legalizing the call start should have legalized this node!");
- return I->second;
- }
-
- // Otherwise, the call start has been legalized and everything is going
- // according to plan. Just legalize ourselves normally here.
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
- // Do not try to legalize the target-specific arguments (#1+), except for
- // an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
- if (Tmp1 != Node->getOperand(0)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- &Ops[0], Ops.size()),
- Result.getResNo());
- }
- } else {
- Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
- if (Tmp1 != Node->getOperand(0) ||
- Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Ops.back() = Tmp2;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- &Ops[0], Ops.size()),
- Result.getResNo());
- }
- }
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
- // This finishes up call legalization.
- IsLegalizingCall = false;
-
- // If the CALLSEQ_END node has a flag, remember that we legalized it.
- AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
- if (Node->getNumValues() == 2)
- AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
- return Result.getValue(Op.getResNo());
+ break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Node);
- Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
- Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+ Tmp1 = LD->getChain(); // Legalize the chain.
+ Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
EVT VT = Node->getValueType(0);
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp3 = Result.getValue(0);
- Tmp4 = Result.getValue(1);
+ Tmp3 = SDValue(Node, 0);
+ Tmp4 = SDValue(Node, 1);
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned load and the target doesn't support it,
// expand it.
@@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp3 = Result.getOperand(0);
- Tmp4 = Result.getOperand(1);
- Tmp3 = LegalizeOp(Tmp3);
- Tmp4 = LegalizeOp(Tmp4);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Tmp3, Tmp4);
}
}
break;
case TargetLowering::Custom:
Tmp1 = TLI.LowerOperation(Tmp3, DAG);
if (Tmp1.getNode()) {
- Tmp3 = LegalizeOp(Tmp1);
- Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ Tmp3 = Tmp1;
+ Tmp4 = Tmp1.getValue(1);
}
break;
case TargetLowering::Promote: {
@@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
- Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ LD->isInvariant(), LD->getAlignment());
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
+ Tmp4 = Tmp1.getValue(1);
break;
}
}
- // Since loads produce two values, make sure to remember that we
- // legalized both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp3);
- AddLegalizedOperand(SDValue(Node, 1), Tmp4);
- return Op.getResNo() ? Tmp4 : Tmp3;
+ if (Tmp4.getNode() != Node) {
+ assert(Tmp3.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
+ ReplacedNode(Node);
+ }
+ return;
}
EVT SrcVT = LD->getMemoryVT();
@@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
Ch = Result.getValue(1); // The chain.
@@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getValueType(), Result,
DAG.getValueType(SrcVT));
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ Tmp1 = Result;
+ Tmp2 = Ch;
} else if (SrcWidth & (SrcWidth - 1)) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
@@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
} else {
// Big endian - avoid unaligned loads.
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
@@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
}
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ Tmp2 = Ch;
} else {
switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
// FALLTHROUGH
case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
+ Tmp1 = SDValue(Node, 0);
+ Tmp2 = SDValue(Node, 1);
if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
+ Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ Tmp1 = Tmp3;
+ Tmp2 = Tmp3.getValue(1);
}
} else {
// If this is an unaligned load and the target doesn't support it,
@@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
unsigned ABIAlignment =
TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Tmp1, Tmp2);
}
}
}
@@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment());
unsigned ExtendOp;
switch (ExtType) {
case ISD::EXTLOAD:
@@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
default: llvm_unreachable("Unexpected extend load type!");
}
- Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
+ Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp2 = Load.getValue(1);
break;
}
- // If this is a promoted vector load, and the vector element types are
- // legal, then scalarize it.
- if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
- TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
- SmallVector<SDValue, 8> LoadVals;
- SmallVector<SDValue, 8> LoadChains;
- unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
- Node->getValueType(0).getScalarType(),
- Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
-
- LoadVals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
- SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Node->getValueType(0), &LoadVals[0], LoadVals.size());
-
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
- break;
- }
-
- // If this is a promoted vector load, and the vector element types are
- // illegal, create the promoted vector from bitcasted segments.
- if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
- EVT MemElemTy = Node->getValueType(0).getScalarType();
- EVT SrcSclrTy = SrcVT.getScalarType();
- unsigned SizeRatio =
- (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
-
- SmallVector<SDValue, 8> LoadVals;
- SmallVector<SDValue, 8> LoadChains;
- unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
- SrcVT.getScalarType(),
- Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- if (TLI.isBigEndian()) {
- // MSB (which is garbage, comes first)
- LoadVals.push_back(ScalarLoad.getValue(0));
- for (unsigned i = 0; i<SizeRatio-1; ++i)
- LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
- } else {
- // LSB (which is data, comes first)
- for (unsigned i = 0; i<SizeRatio-1; ++i)
- LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
- LoadVals.push_back(ScalarLoad.getValue(0));
- }
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
-
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
- EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
- SrcVT.getScalarType(), NumElem*SizeRatio);
- SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
- TempWideVector, &LoadVals[0], LoadVals.size());
-
- // Cast to the correct type
- ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
-
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
- break;
-
- }
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
// FIXME: This does not work for vectors on most targets. Sign- and
// zero-extend operations are currently folded into extending loads,
@@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
"EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
SDValue ValRes;
if (ExtType == ISD::SEXTLOAD)
ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result, DAG.getValueType(SrcVT));
else
ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ Tmp1 = ValRes;
+ Tmp2 = Result.getValue(1);
break;
}
}
// Since loads produce two values, make sure to remember that we legalized
// both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
+ if (Tmp2.getNode() != Node) {
+ assert(Tmp1.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
+ ReplacedNode(Node);
+ }
+ break;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
- Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
- Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ Tmp1 = ST->getChain();
+ Tmp2 = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- Result = SDValue(OptStore, 0);
+ ReplaceNode(ST, OptStore);
break;
}
{
- Tmp3 = LegalizeOp(ST->getValue());
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp3, Tmp2,
- ST->getOffset()),
- Result.getResNo());
-
+ Tmp3 = ST->getValue();
EVT VT = Tmp3.getValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
- Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
- DAG, TLI);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
}
break;
case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(Result, DAG);
- if (Tmp1.getNode()) Result = Tmp1;
+ Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Tmp1.getNode())
+ ReplaceNode(SDValue(Node, 0), Tmp1);
break;
- case TargetLowering::Promote:
+ case TargetLowering::Promote: {
assert(VT.isVector() && "Unknown legal promote case!");
Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ }
break;
}
} else {
- Tmp3 = LegalizeOp(ST->getValue());
+ Tmp3 = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
@@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
// The order of the stores doesn't matter.
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
} else {
- if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
- Tmp2 != ST->getBasePtr())
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp3, Tmp2,
- ST->getOffset()),
- Result.getResNo());
-
switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
- Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
- DAG, TLI);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
case TargetLowering::Custom:
- Result = TLI.LowerOperation(Result, DAG);
+ ReplaceNode(SDValue(Node, 0),
+ TLI.LowerOperation(SDValue(Node, 0), DAG));
break;
case TargetLowering::Expand:
-
- EVT WideScalarVT = Tmp3.getValueType().getScalarType();
- EVT NarrowScalarVT = StVT.getScalarType();
-
- if (StVT.isVector()) {
- unsigned NumElem = StVT.getVectorNumElements();
- // The type of the data we want to save
- EVT RegVT = Tmp3.getValueType();
- EVT RegSclVT = RegVT.getScalarType();
- // The type of data as saved in memory.
- EVT MemSclVT = StVT.getScalarType();
-
- bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
- bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
-
- // We need to expand this store. If the register element type
- // is legal then we can scalarize the vector and use
- // truncating stores.
- if (RegScalarLegal) {
- // Cast floats into integers
- unsigned ScalarSize = MemSclVT.getSizeInBits();
- EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
-
- // Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize))
- ScalarSize = NextPowerOf2(ScalarSize);
-
- // Store Stride in bytes
- unsigned Stride = ScalarSize/8;
- // Extract each of the elements from the original vector
- // and save them into memory individually.
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
-
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // This scalar TruncStore may be illegal, but we lehalize it
- // later.
- SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment);
-
- Stores.push_back(Store);
- }
-
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
-
- // The scalar register type is illegal.
- // For example saving <2 x i64> -> <2 x i32> on a x86.
- // In here we bitcast the value into a vector of smaller parts and
- // save it using smaller scalars.
- if (!RegScalarLegal && MemScalarLegal) {
- // Store Stride in bytes
- unsigned Stride = MemSclVT.getSizeInBits()/8;
-
- unsigned SizeRatio =
- (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
-
- EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
- MemSclVT,
- SizeRatio * NumElem);
-
- // Cast the wide elem vector to wider vec with smaller elem type.
- // Example <2 x i64> -> <4 x i32>
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
-
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
- // Extract the Ith element.
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
- // Bump pointer.
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // Store if, this element is:
- // - First element on big endian, or
- // - Last element on little endian
- if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
- ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
- SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride),
- isVolatile, isNonTemporal, Alignment);
- Stores.push_back(Store);
- }
- }
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
-
- assert(false && "Unable to legalize the vector trunc store!");
- }// is vector
-
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
// TRUNCSTORE:i16 i32 -> STORE i16
assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
break;
}
}
@@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
}
}
- assert(Result.getValueType() == Op.getValueType() &&
- "Bad legalization!");
-
- // Make sure that the generated code is itself legal.
- if (Result != Op)
- Result = LegalizeOp(Result);
-
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- AddLegalizedOperand(Op, Result);
- return Result;
}
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (Op.getValueType().isVector())
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
Vec.getValueType().getVectorElementType(),
@@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
- false, false, 0);
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
EVT OpVT = LHS.getValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
- default: assert(0 && "Unknown condition code action!");
+ default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
@@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
- default: assert(0 && "Don't know how to expand this condition!");
+ default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
@@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
- false, false, DestAlign);
+ false, false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
@@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
MachinePointerInfo::getFixedStack(SPFI),
- false, false, 0);
+ false, false, false, 0);
}
@@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// If all elements are constants, create a load from the constant pool.
if (isConstant) {
- std::vector<Constant*> CV;
+ SmallVector<Constant*, 16> CV;
for (unsigned i = 0, e = NumElems; i != e; ++i) {
if (ConstantFPSDNode *V =
dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment);
+ false, false, false, Alignment);
}
if (!MoreThanTwoValues) {
@@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
- // The input chain to this libcall is the entry node of the function.
- // Legalizing the call will automatically add the previous call to the
- // dependence.
- SDValue InChain = DAG.getEntryNode();
-
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
- bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+ SDValue TCChain = InChain;
+ bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+ if (isTailCall)
+ InChain = TCChain;
+
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), isTailCall,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
return CallInfo.first;
}
@@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
-
return CallInfo.first;
}
@@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
return CallInfo;
}
@@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_PPCF128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
case MVT::f80: LC = Call_F80; break;
@@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
DebugLoc dl = Node->getDebugLoc();
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
-
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
// Remainder is loaded back from the stack frame.
- SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+ MachinePointerInfo(), false, false, false, 0);
Results.push_back(CallInfo.first);
Results.push_back(Rem);
}
@@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
false, false, 0);
// load the constructed double
SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
// offset depending on the data type.
uint64_t FF;
switch (Op0.getValueType().getSimpleVT().SimpleTy) {
- default: assert(0 && "Unsupported integer type!");
+ default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
@@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment);
+ false, false, false, Alignment);
else {
- FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
- DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, Alignment));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
}
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
EVT SHVT = TLI.getShiftAmountTy(VT);
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(0 && "Unhandled Expand type in BSWAP!");
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
DebugLoc dl) {
switch (Opc) {
- default: assert(0 && "Cannot expand this yet!");
+ default: llvm_unreachable("Cannot expand this yet!");
case ISD::CTPOP: {
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return Op;
}
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
// for now, we do this:
// x = x | (x >> 1);
@@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
Op = DAG.getNOT(dl, Op, VT);
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
case ISD::CTTZ: {
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
return ExpandChainLibCall(LC, Node, false);
}
-void SelectionDAGLegalize::ExpandNode(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
DebugLoc dl = Node->getDebugLoc();
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
@@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// If the target didn't expand these, there's nothing to do, so just
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
@@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
Args, DAG, dl);
@@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
Results.push_back(CallResult.second);
@@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned Align = Node->getConstantOperandVal(3);
SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, 0);
+ MachinePointerInfo(V),
+ false, false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, 0));
+ false, false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
}
@@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
Node->getOperand(2), MachinePointerInfo(VS),
- false, false, 0);
+ false, false, false, 0);
Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
@@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Node->getOperand(2), dl));
break;
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
- if (!TLI.isTypeLegal(EltVT))
- EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
unsigned NumElems = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 16> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
if (Mask[i] < 0) {
Ops.push_back(DAG.getUNDEF(EltVT));
@@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned Idx = Mask[i];
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(0),
+ Op0,
DAG.getIntPtrConstant(Idx)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(1),
+ Op1,
DAG.getIntPtrConstant(Idx - NumElems)));
}
+
Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
break;
}
@@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
- if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
- Results.push_back(SDValue(Node, 0));
- else
- Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(ExpandConstantFP(CFP, true));
break;
}
case ISD::EHSELECTION: {
@@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
break;
}
case ISD::EXCEPTIONADDR: {
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
assert(Reg && "Can't expand to unknown register!");
Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
Node->getValueType(0)));
Results.push_back(Results[0].getValue(1));
break;
}
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ "Don't know how to expand this FP subtraction!");
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
case ISD::SUB: {
EVT VT = Node->getValueType(0);
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
DAG.getIntPtrConstant(1));
+ // Ret is a node with an illegal type. Because such things are not
+ // generally permitted during this phase of legalization, delete the
+ // node. The above EXTRACT_ELEMENT nodes should have been folded.
+ DAG.DeleteNode(Ret.getNode());
}
if (isSigned) {
@@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, dl);
- LastCALLSEQ_END = DAG.getEntryNode();
assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(Tmp1);
break;
}
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ SDValue Result =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
// FIXME: Custom lowering for these operations shouldn't return null!
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- Results.push_back(SDValue(Node, i));
break;
}
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
}
-void SelectionDAGLegalize::PromoteNode(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
EVT OVT = Node->getValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP ||
@@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
SDValue Tmp1, Tmp2, Tmp3;
switch (Node->getOpcode()) {
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- // Perform the larger operation.
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
if (Node->getOpcode() == ISD::CTTZ) {
- //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ // FIXME: This should set a bit in the zero extended value instead.
Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
ISD::SETEQ);
Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ) {
+ } else if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
@@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
Node->getOpcode() == ISD::SINT_TO_FP, dl);
Results.push_back(Tmp1);
break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ break;
+ }
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
break;
}
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
// Cast the two input vectors.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0)));
+ break;
}
+ case ISD::FLOG2:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FEXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
}
// SelectionDAG::Legalize - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7c1cc69d6a2f..e3938968b205 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), NVT,
- L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+ L->getPointerInfo(), NVT, L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
- L->isNonTemporal(), L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
case ISD::SETUEQ:
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
break;
- default: assert(false && "Do not know how to soften this setcc!");
+ default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
case MVT::i32:
Parts = TwoE32;
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5c4c2ded4c5..95ddb1e0f6fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,7 +20,6 @@
#include "LegalizeTypes.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CONVERT_RNDSAT:
Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
- if (NOutVT.bitsEq(NInVT))
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
// The input promotes to the same size. Convert the promoted value.
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
@@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case TargetLowering::TypeWidenVector:
- if (OutVT.bitsEq(NInVT))
- // The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
- Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(ISD::SUB, dl, NVT, Op,
DAG.getConstant(NVT.getSizeInBits() -
@@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
- // The count is the same in the promoted type except if the original
- // value was zero. This can be handled by setting the bit just off
- // the top of the original type.
- APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.setBit(OVT.getSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
- return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
- SDValue Mask = GetPromotedInteger(N->getOperand(0));
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
// If we don't know anything about the high bits, exit.
if (((KnownZero|KnownOne) & HighBitMask) == 0)
@@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
}
}
-#if 0
- // FIXME: This code is broken for shifts with a zero amount!
// If we know that all of the high bits of the shift amount are zero, then we
// can do this as a couple of simple shifts.
if ((KnownZero & HighBitMask) == HighBitMask) {
- // Compute 32-amt.
- SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
- DAG.getConstant(NVTBits, ShTy),
- Amt);
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits-1, ShTy));
+
unsigned Op1, Op2;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
@@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
}
- Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
- Hi = DAG.getNode(ISD::OR, NVT,
- DAG.getNode(Op1, NVT, InH, Amt),
- DAG.getNode(Op2, NVT, InL, Amt2));
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
return true;
}
-#endif
return false;
}
@@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
}
-
- return false;
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
@@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
+ bool isInvariant = N->isInvariant();
DebugLoc dl = N->getDebugLoc();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
- true, Func, Args, DAG, dl);
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, PtrVT),
ISD::SETNE);
@@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
else if (SrcVT == MVT::i128)
FF = APInt(32, F32TwoE128);
else
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
// Check whether the sign bit is set.
SDValue Lo, Hi;
@@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(1);
- assert(Op0.getValueType() == Op1.getValueType() &&
- "Invalid input vector types");
-
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT InElemTy = OutVT.getVectorElementType();
EVT OutElemTy = NOutVT.getVectorElementType();
- unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
- unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
unsigned NumOutElem = NOutVT.getVectorNumElements();
- assert(NumElem0 + NumElem1 == NumOutElem &&
- "Invalid number of incoming elements");
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
- for (unsigned i = 0; i < NumElem0; ++i) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- Op0.getValueType().getScalarType(), Op0,
- DAG.getIntPtrConstant(i));
- Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
- }
-
- // Take the elements from the second vector
- for (unsigned i = 0; i < NumElem1; ++i) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- Op1.getValueType().getScalarType(), Op1,
- DAG.getIntPtrConstant(i));
- Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InElemTy, Op, DAG.getIntPtrConstant(j));
+ Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a4bb577433cc..439aa4de5cf5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
switch (getTypeAction(ResultVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
break;
// The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
EVT OpVT = N->getOperand(i).getValueType();
switch (getTypeAction(OpVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
continue;
// The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
- assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
return CallInfo.first;
}
@@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index abacdac686bc..e8664458e9a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -521,6 +521,7 @@ private:
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -633,6 +634,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 8e7e4985e4d0..a8ff7c65abde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -21,7 +21,6 @@
#include "LegalizeTypes.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
break;
@@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
+ false, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
PtrInfo.getWithOffset(IncrementSize), false,
- false, MinAlign(Alignment, IncrementSize));
+ false, false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
if (TLI.isBigEndian())
@@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- isVolatile, isNonTemporal,
+ isVolatile, isNonTemporal, isInvariant,
MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f815b00db5d6..3ae8345bd198 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,8 @@ class VectorLegalizer {
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
@@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDValue Result =
SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ return TranslateLegalizeResults(Op, Result);
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ST->getValue().getValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ Changed = true;
+ return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandStore(Op));
+ }
+ }
+
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
@@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
- case ISD::CTTZ:
case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
@@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+ Op.getNode()->getValueType(0).getScalarType(),
+ Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDValue Chain = ST->getChain();
+ SDValue BasePTR = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
- if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::OR, VT))
- return DAG.UnrollVectorOp(Op.getNode());
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
&& "Invalid mask size");
@@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
DebugLoc DL = Op.getDebugLoc();
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
- return DAG.UnrollVectorOp(Op.getNode());
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
EVT SVT = VT.getScalarType();
assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107a42b2951c..5f23f01dafb4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,6 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
@@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment());
+ N->isInvariant(), N->getOriginalAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
return InOp;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+ Cond, DAG.getConstant(1, CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
switch (N->getOpcode()) {
default:
@@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
- case ISD::CTPOP:
case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Lo part from the stack slot.
Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, MinAlign(Alignment, IncrementSize));
+ false, false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
EVT LoMemVT, HiMemVT;
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- Alignment);
+ isInvariant, Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, Alignment);
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
DebugLoc dl = N->getDebugLoc();
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
- // Split the input.
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
- switch (getTypeAction(InVT)) {
- default: llvm_unreachable("Unexpected type action!");
- case TargetLowering::TypeLegal: {
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ } else {
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case TargetLowering::TypePromoteInteger: {
- SDValue InOp = GetPromotedInteger(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
- InOp.getValueType().getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case TargetLowering::TypeSplitVector:
- GetSplitVector(N->getOperand(0), Lo, Hi);
- break;
- case TargetLowering::TypeWidenVector: {
- // If the result needs to be split and the input needs to be widened,
- // the two types must have different lengths. Use the widened result
- // and extract from it to do the split.
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
}
if (N->getOpcode() == ISD::FP_ROUND) {
@@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
@@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
@@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
WidenVT, Cond1, InOp1, InOp2);
}
@@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
@@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
MVT::Other,&StChain[0],StChain.size());
}
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
@@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, Align);
+ isVolatile, isNonTemporal, isInvariant, Align);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
+ SDValue L;
if (LdWidth < NewVTWidth) {
// Our current type we are using is too large, find a better size
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
- }
-
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
isVolatile,
- isNonTemporal, MinAlign(Align, Increment));
- LdChain.push_back(LdOp.getValue(1));
- LdOps.push_back(LdOp);
+ isNonTemporal, isInvariant,
+ MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+ &Loads[0], Loads.size());
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
LdWidth -= NewVTWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..ff0136e08cd9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+ Picker(this),
+ InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+ TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+ TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+ TLI = &IS->getTargetLowering();
+
+ const TargetMachine &tm = (*IS->MF).getTarget();
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ // This hard requirment could be relaxed, but for now
+ // do not let it procede.
+ assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions alredy in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignor order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ EVT VT = SU->getNode()->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ signed ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFive;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityFour;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return 0;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ signed BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = Queue.begin(),
+ E = Queue.end(); I != E; ++I) {
+ if (*I == *Best)
+ continue;
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+ ResourcePriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c6321ae4..24da432a47a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
SmallVector<SUnit *, 16> Queue;
bool empty() const { return Queue.empty(); }
-
+
void push(SUnit *U) {
Queue.push_back(U);
}
@@ -101,8 +101,8 @@ private:
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp();
- /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
- bool ForceUnitLatencies() const { return true; }
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const { return true; }
};
} // end anonymous namespace
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
// Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
SDValue(LoadNode, 1));
- SUnit *NewSU = NewSUnit(N);
+ SUnit *NewSU = newSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
LoadSU = &SUnits[LoadNode->getNodeId()];
isNewLoad = false;
} else {
- LoadSU = NewSUnit(LoadNode);
+ LoadSU = newSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
}
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
if (isNewLoad) {
AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
}
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
- SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
Added = true;
}
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
if (RegAdded.insert(*Alias)) {
LRegs.push_back(*Alias);
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/true);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e757defd3895..2cb5d37d689e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -45,10 +45,6 @@ static RegisterScheduler
"Bottom-up register reduction list scheduling",
createBURRListDAGScheduler);
static RegisterScheduler
- tdrListrDAGScheduler("list-tdrr",
- "Top-down register reduction list scheduling",
- createTDRRListDAGScheduler);
-static RegisterScheduler
sourceListDAGScheduler("source",
"Similar to list-burr but schedules in source "
"order when possible",
@@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
static cl::opt<bool> DisableSchedHeight(
"disable-sched-height", cl::Hidden, cl::init(false),
cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
static cl::opt<int> MaxReorderWindow(
"max-sched-reorder", cl::Hidden, cl::init(6),
@@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
"sched-avg-ipc", cl::Hidden, cl::init(1),
cl::desc("Average inst/cycle whan no target itinerary exists."));
-#ifndef NDEBUG
-namespace {
- // For sched=list-ilp, Count the number of times each factor comes into play.
- enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
- FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -121,10 +109,6 @@ namespace {
///
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
private:
- /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
- /// it is top-down.
- bool isBottomUp;
-
/// NeedLatency - True if the scheduler will make use of latency information.
///
bool NeedLatency;
@@ -162,11 +146,15 @@ private:
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
public:
ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
SchedulingPriorityQueue *availqueue,
CodeGenOpt::Level OptLevel)
- : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ : ScheduleDAGSDNodes(mf),
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
Topo(SUnits) {
@@ -221,8 +209,6 @@ private:
void ReleasePred(SUnit *SU, const SDep *PredEdge);
void ReleasePredecessors(SUnit *SU);
- void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
- void ReleaseSuccessors(SUnit *SU);
void ReleasePending();
void AdvanceToCycle(unsigned NextCycle);
void AdvancePastStalls(SUnit *SU);
@@ -242,15 +228,11 @@ private:
SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
- void ScheduleNodeTopDown(SUnit*);
- void ListScheduleTopDown();
-
-
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
- SUnit *NewNode = NewSUnit(N);
+ SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
Topo.InitDAGTopologicalSorting();
@@ -268,9 +250,9 @@ private:
return NewNode;
}
- /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
/// need actual latency information but the hybrid scheduler does.
- bool ForceUnitLatencies() const {
+ bool forceUnitLatencies() const {
return !NeedLatency;
}
};
@@ -278,7 +260,7 @@ private:
/// GetCostForDef - Looks up the register class and cost for a given definition.
/// Typically this just means looking up the representative register class,
-/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
/// opcode to determine what register class is being generated.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
const TargetLowering *TLI,
@@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
// Special handling for untyped values. These values can only come from
// the expansion of custom DAG-to-DAG patterns.
- if (VT == MVT::untyped) {
+ if (VT == MVT::Untyped) {
const SDNode *Node = RegDefPos.GetNode();
unsigned Opcode = Node->getMachineOpcode();
@@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
<< " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- FactorCount[i] = 0;
- }
-#endif //!NDEBUG
CurCycle = 0;
IssueCount = 0;
MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegGens.resize(TRI->getNumRegs(), NULL);
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+ LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ CallSeqEndForStart.clear();
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() {
HazardRec->Reset();
- // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
- if (isBottomUp)
- ListScheduleBottomUp();
- else
- ListScheduleTopDown();
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
- }
-#endif // !NDEBUG
AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
//===----------------------------------------------------------------------===//
@@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#endif
--PredSU->NumSuccsLeft;
- if (!ForceUnitLatencies()) {
+ if (!forceUnitLatencies()) {
// Updating predecessor's height. This is now the cycle when the
// predecessor can be scheduled without causing a pipeline stall.
PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
}
}
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ for (;;) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ for (;;) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = 0;
+ unsigned BestMaxNest = MaxNest;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return 0;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return 0;
+ }
+}
+
/// Call ReleasePred for each predecessor, then update register live def/gen.
/// Always update LiveRegDefs for a register dependence even if the current SU
/// also defines the register. This effectively create one large live range
@@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
}
}
}
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
}
/// Check to see if any of the pending instructions are ready to issue. If
@@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
- unsigned ReadyCycle =
- isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
if (ReadyCycle < MinAvailableCycle)
MinAvailableCycle = ReadyCycle;
@@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
}
else {
for (; CurCycle != NextCycle; ++CurCycle) {
- if (isBottomUp)
- HazardRec->RecedeCycle();
- else
- HazardRec->AdvanceCycle();
+ HazardRec->RecedeCycle();
}
}
// FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
@@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
// currently need to treat these nodes like real instructions.
// if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
- unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+ unsigned ReadyCycle = SU->getHeight();
// Bump CurCycle to account for latency. We assume the latency of other
// available instructions may be hidden by the stall (not a full pipe stall).
@@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
// Calls are scheduled in their preceding cycle, so don't conflict with
// hazards from instructions after the call. EmitNode will reset the
// scoreboard state before emitting the call.
- if (isBottomUp && SU->isCall)
+ if (SU->isCall)
return;
// FIXME: For resource conflicts in very long non-pipelined stages, we
@@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
int Stalls = 0;
while (true) {
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+ HazardRec->getHazardType(SU, -Stalls);
if (HT == ScheduleHazardRecognizer::NoHazard)
break;
@@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
HazardRec->Reset();
return;
}
- if (isBottomUp && SU->isCall) {
+ if (SU->isCall) {
// Calls are scheduled with their preceding instructions. For bottom-up
// scheduling, clear the pipeline state before emitting.
HazardRec->Reset();
}
HazardRec->EmitInstruction(SU);
-
- if (!isBottomUp && SU->isCall) {
- HazardRec->Reset();
- }
}
static void resetVRegCycle(SUnit *SU);
@@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
Sequence.push_back(SU);
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
// advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
LiveRegGens[I->getReg()] = NULL;
}
}
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ }
+ }
resetVRegCycle(SU);
@@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
}
}
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ }
+ }
+
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()])
+ ++NumLiveRegs;
// This becomes the nearest def. Note that an earlier def may still be
// pending if this is a two-address node.
LiveRegDefs[I->getReg()] = SU;
- if (!LiveRegDefs[I->getReg()]) {
- ++NumLiveRegs;
- }
if (LiveRegGens[I->getReg()] == NULL ||
I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
LiveRegGens[I->getReg()] = I->getSUnit();
@@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
else {
AvailableQueue->push(SU);
}
- AvailableQueue->UnscheduledNode(SU);
+ AvailableQueue->unscheduledNode(SU);
}
/// After backtracking, the hazard checker needs to be restored to a state
@@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return NULL;
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return NULL;
+
DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
@@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
LoadNode->setNodeId(LoadSU->NodeNum);
InitNumRegDefsLeft(LoadSU);
- ComputeLatency(LoadSU);
+ computeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
@@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
NewSU->isCommutable = true;
InitNumRegDefsLeft(NewSU);
- ComputeLatency(NewSU);
+ computeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
SmallVector<SDep, 4> ChainPreds;
@@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
// Check if Ref is live.
if (!LiveRegDefs[*AliasI]) continue;
@@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
}
}
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i))
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const RegisterMaskSDNode *Op =
+ dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+ return Op->getRegMask();
+ return NULL;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (!Node->isMachineOpcode())
continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
@@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(isBottomUp);
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Down Scheduling
-//===----------------------------------------------------------------------===//
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
- SUnit *SuccSU = SuccEdge->getSUnit();
-
-#ifndef NDEBUG
- if (SuccSU->NumPredsLeft == 0) {
- dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
- dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
- }
-#endif
- --SuccSU->NumPredsLeft;
-
- // If all the node's predecessors are scheduled, this node is ready
- // to be scheduled. Ignore the special ExitSU node.
- if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
- SuccSU->isAvailable = true;
- AvailableQueue->push(SuccSU);
- }
-}
-
-void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
- // Top down: release successors
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- assert(!I->isAssignedRegDep() &&
- "The list-tdrr scheduler doesn't yet support physreg dependencies!");
-
- ReleaseSucc(SU, &*I);
- }
-}
-
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
-/// count of its successors. If a successor pending count is zero, add it to
-/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
-
- assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
- SU->setDepthToAtLeast(CurCycle);
- Sequence.push_back(SU);
-
- ReleaseSuccessors(SU);
- SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
-}
-
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
-/// schedulers.
-void ScheduleDAGRRList::ListScheduleTopDown() {
- AvailableQueue->setCurCycle(CurCycle);
-
- // Release any successors of the special Entry node.
- ReleaseSuccessors(&EntrySU);
-
- // All leaves to Available queue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- // It is available if it has no predecessors.
- if (SUnits[i].Preds.empty()) {
- AvailableQueue->push(&SUnits[i]);
- SUnits[i].isAvailable = true;
- }
- }
-
- // While Available queue is not empty, grab the node with the highest
- // priority. If it is not ready put it back. Schedule the node.
- Sequence.reserve(SUnits.size());
- while (!AvailableQueue->empty()) {
- SUnit *CurSU = AvailableQueue->pop();
-
- if (CurSU)
- ScheduleNodeTopDown(CurSU);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
- }
-
-#ifndef NDEBUG
- VerifySchedule(isBottomUp);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
-
//===----------------------------------------------------------------------===//
// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
@@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort {
bool operator()(SUnit* left, SUnit* right) const;
};
-// td_ls_rr_sort - Priority function for top down register pressure reduction
-// scheduler.
-struct td_ls_rr_sort : public queue_sort {
- enum {
- IsBottomUp = false,
- HasReadyFilter = false
- };
-
- RegReductionPQBase *SPQ;
- td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
-};
-
// src_ls_rr_sort - Priority function for source order scheduler.
struct src_ls_rr_sort : public queue_sort {
enum {
@@ -1510,6 +1587,7 @@ protected:
std::vector<SUnit*> Queue;
unsigned CurQueueId;
bool TracksRegPressure;
+ bool SrcOrder;
// SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits;
@@ -1535,11 +1613,12 @@ public:
RegReductionPQBase(MachineFunction &mf,
bool hasReadyFilter,
bool tracksrp,
+ bool srcorder,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
: SchedulingPriorityQueue(hasReadyFilter),
- CurQueueId(0), TracksRegPressure(tracksrp),
+ CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
if (TracksRegPressure) {
unsigned NumRC = TRI->getNumRegClasses();
@@ -1610,9 +1689,9 @@ public:
int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
- void ScheduledNode(SUnit *SU);
+ void scheduledNode(SUnit *SU);
- void UnscheduledNode(SUnit *SU);
+ void unscheduledNode(SUnit *SU);
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
public:
RegReductionPriorityQueue(MachineFunction &mf,
bool tracksrp,
+ bool srcorder,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
- : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
Picker(this) {}
bool isBottomUp() const { return SF::IsBottomUp; }
@@ -1680,10 +1761,7 @@ public:
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
- if (isBottomUp())
- dbgs() << "Height " << SU->getHeight() << ": ";
- else
- dbgs() << "Depth " << SU->getDepth() << ": ";
+ dbgs() << "Height " << SU->getHeight() << ": ";
SU->dump(DAG);
}
}
@@ -1692,9 +1770,6 @@ public:
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
BURegReductionPriorityQueue;
-typedef RegReductionPriorityQueue<td_ls_rr_sort>
-TDRegReductionPriorityQueue;
-
typedef RegReductionPriorityQueue<src_ls_rr_sort>
SrcRegReductionPriorityQueue;
@@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
return PDiff;
}
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
dumpRegPressure();
}
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LHeight = (int)left->getHeight() + LPenalty;
int RHeight = (int)right->getHeight() + RPenalty;
- bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
BUHasStall(left, LHeight, SPQ);
- bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
BUHasStall(right, RHeight, SPQ);
// If scheduling one of the node will cause a pipeline stall, delay it.
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall) {
- DEBUG(++FactorCount[FactStall]);
+ if (!RStall)
return 1;
- }
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactStall]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
- } else if (RStall) {
- DEBUG(++FactorCount[FactStall]);
+ } else if (RStall)
return -1;
- }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
- if (!checkPref || (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency)) {
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
@@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
- DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency) {
- DEBUG(++FactorCount[FactOther]);
+ if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
- }
}
return 0;
}
@@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
bool LHasPhysReg = left->hasPhysRegDefs;
bool RHasPhysReg = right->hasPhysRegDefs;
if (LHasPhysReg != RHasPhysReg) {
- DEBUG(++FactorCount[FactRegUses]);
#ifndef NDEBUG
const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
#endif
@@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
}
- if (LPriority != RPriority) {
- DEBUG(++FactorCount[FactStatic]);
+ if (LPriority != RPriority)
return LPriority > RPriority;
- }
// One or both of the nodes are calls and their sethi-ullman numbers are the
// same, then keep source order.
@@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist) {
- DEBUG(++FactorCount[FactOther]);
+ if (LDist != RDist)
return LDist < RDist;
- }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch) {
- DEBUG(++FactorCount[FactOther]);
+ if (LScratch != RScratch)
return LScratch > RScratch;
- }
// Comparing latency against a call makes little sense unless the node
// is register pressure-neutral.
@@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight()) {
- DEBUG(++FactorCount[FactHeight]);
+ if (left->getHeight() != right->getHeight())
return left->getHeight() > right->getHeight();
- }
- if (left->getDepth() != right->getDepth()) {
- DEBUG(++FactorCount[FactDepth]);
+ if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
- }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
- DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
@@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
}
if (!DisableSchedRegPressure && LPDiff != RPDiff) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
<< " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
return LPDiff > RPDiff;
@@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
bool LReduce = canEnableCoalescing(left);
bool RReduce = canEnableCoalescing(right);
- DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
if (LReduce && !RReduce) return false;
if (RReduce && !LReduce) return true;
}
@@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
- DEBUG(++FactorCount[FactRegUses]);
return LLiveUses < RLiveUses;
}
if (!DisableSchedStalls) {
bool LStall = BUHasStall(left, left->getHeight(), SPQ);
bool RStall = BUHasStall(right, right->getHeight(), SPQ);
- if (LStall != RStall) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LStall != RStall)
return left->getHeight() > right->getHeight();
- }
}
if (!DisableSchedCriticalPath) {
@@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
<< left->getDepth() << " != SU(" << right->NodeNum << "): "
<< right->getDepth() << "\n");
- DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
}
}
if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
int spread = (int)left->getHeight() - (int)right->getHeight();
- if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(++FactorCount[FactHeight]);
+ if (std::abs(spread) > MaxReorderWindow)
return left->getHeight() > right->getHeight();
- }
}
return BURRSort(left, right, SPQ);
@@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
- if (!TracksRegPressure)
+ if (!TracksRegPressure && !SrcOrder)
PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
@@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const unsigned *ImpDefs
+ const uint16_t *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
- if(!ImpDefs)
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
return false;
for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
if (!PI->isAssignedRegDep())
continue;
- for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
- // Return true if SU clobbers this physical register use and the
- // definition of the register reaches from DepSU. IsReachable queries a
- // topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
- scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
- return true;
- }
+ if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
}
}
return false;
@@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const unsigned *SUImpDefs =
+ const uint16_t *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
- if (!SUImpDefs)
- return false;
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
@@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
if (!N->hasAnyUseOfValue(i))
continue;
unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
for (;*SUImpDefs; ++SUImpDefs) {
unsigned SUReg = *SUImpDefs;
if (TRI->regsOverlap(Reg, SUReg))
@@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
}
}
-/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
-/// predecessors of the successors of the SUnit SU. Stop when the provided
-/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
- unsigned Limit) {
- unsigned Sum = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- const SUnit *SuccSU = I->getSUnit();
- for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
- EE = SuccSU->Preds.end(); II != EE; ++II) {
- SUnit *PredSU = II->getSUnit();
- if (!PredSU->isScheduled)
- if (++Sum > Limit)
- return Sum;
- }
- }
- return Sum;
-}
-
-
-// Top down
-bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
- if (int res = checkSpecialNodes(left, right))
- return res < 0;
-
- unsigned LPriority = SPQ->getNodePriority(left);
- unsigned RPriority = SPQ->getNodePriority(right);
- bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
- bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
- bool LIsFloater = LIsTarget && left->NumPreds == 0;
- bool RIsFloater = RIsTarget && right->NumPreds == 0;
- unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
- unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
-
- if (left->NumSuccs == 0 && right->NumSuccs != 0)
- return false;
- else if (left->NumSuccs != 0 && right->NumSuccs == 0)
- return true;
-
- if (LIsFloater)
- LBonus -= 2;
- if (RIsFloater)
- RBonus -= 2;
- if (left->NumSuccs == 1)
- LBonus += 2;
- if (right->NumSuccs == 1)
- RBonus += 2;
-
- if (LPriority+LBonus != RPriority+RBonus)
- return LPriority+LBonus < RPriority+RBonus;
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
-
- if (left->NumSuccsLeft != right->NumSuccsLeft)
- return left->NumSuccsLeft > right->NumSuccsLeft;
-
- assert(left->NodeQueueId && right->NodeQueueId &&
- "NodeQueueId cannot be zero");
- return (left->NodeQueueId > right->NodeQueueId);
-}
-
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
@@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
BURegReductionPriorityQueue *PQ =
- new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
- PQ->setScheduleDAG(SD);
- return SD;
-}
-
-llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
- TDRegReductionPriorityQueue *PQ =
- new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
- new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
const TargetLowering *TLI = &IS->getTargetLowering();
HybridBURRPriorityQueue *PQ =
- new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
@@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
const TargetLowering *TLI = &IS->getTargetLowering();
ILPBURRPriorityQueue *PQ =
- new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6fa47a..69dd813b24e0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf),
+ : ScheduleDAG(mf), BB(0), DAG(0),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
DAG = dag;
- ScheduleDAG::Run(bb, insertPos);
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
}
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
#ifndef NDEBUG
const SUnit *Addr = 0;
if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
}
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
- SUnit *SU = NewSUnit(Old->getNode());
+ SUnit *SU = newSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
- SUnit *NodeSUnit = NewSUnit(NI);
+ SUnit *NodeSUnit = newSUnit(NI);
// See if anything is glued to this node, if so, add them to glued
// nodes. Nodes can have at most one glue input and one glue output. Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
- ComputeLatency(NodeSUnit);
+ computeLatency(NodeSUnit);
}
// Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
// Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
+ bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+ computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
}
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
SDNode *N = SU->getNode();
// TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies()) {
+ if (forceUnitLatencies()) {
SU->Latency = 1;
return;
}
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
SU->Latency += TII->getInstrLatency(InstrItins, N);
}
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const{
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies())
+ if (forceUnitLatencies())
return;
if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
}
}
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
namespace {
struct OrderSorter {
bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
SUnit *SU = Sequence[i];
if (!SU) {
// Null SUnit* is a noop.
- EmitNoop();
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
continue;
}
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
- EmitPhysRegCopy(SU, CopyVRBaseMap);
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
continue;
}
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
while (DI != DE) {
- MachineBasicBlock *InsertBB = Emitter.getBlock();
- MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
- if (!(*DI)->isInvalidated()) {
- MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
- if (DbgMI)
- InsertBB->insert(Pos, DbgMI);
- }
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
++DI;
}
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
}
- BB = Emitter.getBlock();
InsertPos = Emitter.getInsertPos();
- return BB;
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2ea02ec..75940ec33ddc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
///
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
+ MachineBasicBlock *BB;
SelectionDAG *DAG; // DAG of the current basic block
const InstrItineraryData *InstrItins;
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
explicit ScheduleDAGSDNodes(MachineFunction &mf);
virtual ~ScheduleDAGSDNodes() {}
/// Run - perform scheduling.
///
- void Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos);
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
/// isPassiveNode - Return true if the node is a non-scheduled leaf.
///
@@ -53,6 +56,7 @@ namespace llvm {
if (isa<ConstantSDNode>(Node)) return true;
if (isa<ConstantFPSDNode>(Node)) return true;
if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
if (isa<GlobalAddressSDNode>(Node)) return true;
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
@@ -67,7 +71,7 @@ namespace llvm {
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
- SUnit *NewSUnit(SDNode *N);
+ SUnit *newSUnit(SDNode *N);
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
@@ -78,7 +82,7 @@ namespace llvm {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AliasAnalysis *AA);
/// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
/// CopyToReg and its only active data operands are CopyFromReg within a
@@ -90,30 +94,41 @@ namespace llvm {
///
void InitNumRegDefsLeft(SUnit *SU);
- /// ComputeLatency - Compute node latency.
+ /// computeLatency - Compute node latency.
///
- virtual void ComputeLatency(SUnit *SU);
+ virtual void computeLatency(SUnit *SU);
- /// ComputeOperandLatency - Override dependence edge latency using
+ /// computeOperandLatency - Override dependence edge latency using
/// operand use/def information
///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const { }
- virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
- virtual MachineBasicBlock *EmitSchedule();
-
/// Schedule - Order nodes according to selected style, filling
/// in the Sequence member.
///
virtual void Schedule() = 0;
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
virtual void dumpNode(const SUnit *SU) const;
+ void dumpSchedule() const;
+
virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ virtual std::string getDAGName() const;
+
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
/// RegDefIter - In place iteration over the values defined by an
@@ -159,6 +174,9 @@ namespace llvm {
/// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
void BuildSchedUnits();
void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
};
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 430283d5eff9..c8512914c1e2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -31,6 +31,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include <climits>
using namespace llvm;
@@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
static RegisterScheduler
- tdListDAGScheduler("list-td", "Top-down list scheduler",
- createTDListDAGScheduler);
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
namespace {
//===----------------------------------------------------------------------===//
-/// ScheduleDAGList - The actual list scheduler implementation. This supports
-/// top-down scheduling.
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
///
-class ScheduleDAGList : public ScheduleDAGSDNodes {
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
private:
/// AvailableQueue - The priority queue to use for the available SUnits.
///
@@ -61,16 +62,20 @@ private:
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
public:
- ScheduleDAGList(MachineFunction &mf,
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetMachine &tm = mf.getTarget();
HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
}
- ~ScheduleDAGList() {
+ ~ScheduleDAGVLIW() {
delete HazardRec;
delete AvailableQueue;
}
@@ -78,23 +83,25 @@ public:
void Schedule();
private:
- void ReleaseSucc(SUnit *SU, const SDep &D);
- void ReleaseSuccessors(SUnit *SU);
- void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
- void ListScheduleTopDown();
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
};
} // end anonymous namespace
/// Schedule - Schedule the DAG using list scheduling.
-void ScheduleDAGList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
- BuildSchedGraph(NULL);
+ BuildSchedGraph(AA);
AvailableQueue->initNodes(SUnits);
- ListScheduleTopDown();
+ listScheduleTopDown();
AvailableQueue->releaseState();
}
@@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() {
// Top-Down Scheduling
//===----------------------------------------------------------------------===//
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
/// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
SUnit *SuccSU = D.getSUnit();
#ifndef NDEBUG
@@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
- if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
PendingQueue.push_back(SuccSU);
+ }
}
-void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
// Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
assert(!I->isAssignedRegDep() &&
"The list-td scheduler doesn't yet support physreg dependencies!");
- ReleaseSucc(SU, *I);
+ releaseSucc(SU, *I);
}
}
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
- ReleaseSuccessors(SU);
+ releaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
}
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// listScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
-void ScheduleDAGList::ListScheduleTopDown() {
+void ScheduleDAGVLIW::listScheduleTopDown() {
unsigned CurCycle = 0;
// Release any successors of the special Entry node.
- ReleaseSuccessors(&EntrySU);
+ releaseSuccessors(&EntrySU);
- // All leaves to Available queue.
+ // All leaves to AvailableQueue.
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
// It is available if it has no predecessors.
if (SUnits[i].Preds.empty()) {
@@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
}
- // While Available queue is not empty, grab the node with the highest
+ // While AvailableQueue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
Sequence.reserve(SUnits.size());
@@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
- } else {
+ }
+ else {
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
@@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(0);
++CurCycle;
continue;
}
@@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If we found a node to schedule, do it now.
if (FoundSUnit) {
- ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
// If this is a pseudo-op node, we don't want to increment the current
@@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
+ VerifyScheduledSequence(/*isBottomUp=*/false);
#endif
}
@@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler.
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
-llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 20bea8e4c9e9..92671d1678c6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP format");
+ case MVT::f16: return &APFloat::IEEEhalf;
case MVT::f32: return &APFloat::IEEEsingle;
case MVT::f64: return &APFloat::IEEEdouble;
case MVT::f80: return &APFloat::x87DoubleExtended;
@@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
if (i == e) return false;
// Do not accept build_vectors that aren't all constants or which have non-~0
- // elements.
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
if (isa<ConstantSDNode>(NotZero)) {
- if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+ EltSize)
return false;
} else if (isa<ConstantFPSDNode>(NotZero)) {
- if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
- bitcastToAPInt().isAllOnesValue())
+ if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+ .bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
// Okay, we have at least one ~0 value, check to see if the rest match or are
- // undefs.
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
for (++i; i != e; ++i)
if (N->getOperand(i) != NotZero &&
N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::Register:
ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
break;
-
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
case ISD::SRCVALUE:
ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
break;
@@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
///
static inline unsigned
encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
- bool isNonTemporal) {
+ bool isNonTemporal, bool isInvariant) {
assert((ConvType & 3) == ConvType &&
"ConvType may not require more than 2 bits!");
assert((AM & 7) == AM &&
@@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
return ConvType |
(AM << 2) |
(isVolatile << 5) |
- (isNonTemporal << 6);
+ (isNonTemporal << 6) |
+ (isInvariant << 7);
}
//===----------------------------------------------------------------------===//
@@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
RemoveDeadNodes(DeadNodes, UpdateListener);
}
@@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+ OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
@@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
return getConstantFP(APFloat((float)Val), VT, isTarget);
else if (EltVT==MVT::f64)
return getConstantFP(APFloat(Val), VT, isTarget);
- else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
bool ignored;
APFloat apf = APFloat(Val);
apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
- } else {
- assert(0 && "Unsupported type in getConstantFP");
- return SDValue();
- }
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(N, 0);
}
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ ID.AddPointer(RegMask);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
@@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
return (KnownZero & Mask) == Mask;
}
@@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
/// known to be either zero or one and return them in the KnownZero/KnownOne
/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
/// processing.
-void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
- APInt &KnownZero, APInt &KnownOne,
- unsigned Depth) const {
- unsigned BitWidth = Mask.getBitWidth();
- assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
- "Mask size mismatches value type size!");
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
- if (Depth == 6 || Mask == 0)
+ if (Depth == 6)
return; // Limit search depth.
APInt KnownZero2, KnownOne2;
@@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
- KnownZero = ~KnownOne & Mask;
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
return;
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
- KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero |= KnownZero2;
return;
case ISD::OR:
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
- KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownOne |= KnownOne2;
return;
case ISD::XOR: {
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::MUL: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
LeadZ = std::min(LeadZ, BitWidth);
KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
return;
}
case ISD::UDIV: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(0),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- ComputeMaskedBits(Op.getOperand(1),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
return;
}
case ISD::SELECT:
- ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero &= KnownZero2;
return;
case ISD::SELECT_CC:
- ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero <<= ShAmt;
KnownOne <<= ShAmt;
@@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
KnownZero |= HighBits; // High bits known zero.
}
return;
@@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- APInt InDemandedMask = (Mask << ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
- if (HighBits.getBoolValue())
- InDemandedMask |= APInt::getSignBit(BitWidth);
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
APInt InSignBit = APInt::getSignBit(EBits);
- APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
@@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
- ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownOne &= InputDemandedBits;
+ KnownZero &= InputDemandedBits;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
@@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
if (ISD::isZEXTLoad(Op.getNode())) {
- LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ computeMaskedBitsLoad(*Ranges, KnownZero);
}
return;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask.trunc(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
@@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask.trunc(InBits);
-
- // If any of the sign extended bits are demanded, we know that the sign
- // bit is demanded. Temporarily set this bit in the mask for our callee.
- if (NewBits.getBoolValue())
- InMask |= InSignBit;
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
bool SignBitKnownZero = KnownZero.isNegative();
@@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
assert(!(SignBitKnownZero && SignBitKnownOne) &&
"Sign bit can't be known to be both zero and one!");
- // If the sign bit wasn't actually demanded by our caller, we don't
- // want it set in the KnownZero and KnownOne result values. Reset the
- // mask and reapply it to the result values.
- InMask = Mask.trunc(InBits);
- KnownZero &= InMask;
- KnownOne &= InMask;
-
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask.trunc(InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
return;
@@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask.zext(InBits);
KnownZero = KnownZero.zext(InBits);
KnownOne = KnownOne.zext(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.trunc(BitWidth);
KnownOne = KnownOne.trunc(BitWidth);
@@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
- KnownOne, Depth+1);
- KnownZero |= (~InMask) & Mask;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero |= (~InMask);
return;
}
case ISD::FGETSIGN:
@@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if ((KnownZero2 & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
}
}
}
@@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
- APInt Mask2 = APInt::getLowBitsSet(BitWidth,
- BitWidth - Mask.countLeadingZeros());
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
- ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
@@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// the upper bits are all one.
if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
KnownOne |= ~LowBits;
-
- KnownZero &= Mask;
- KnownOne &= Mask;
-
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
@@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & Mask;
- KnownZero |= ~LowBits & Mask;
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ KnownZero |= ~LowBits;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
break;
}
@@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
- Depth+1);
- ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
return;
}
case ISD::FrameIndex:
@@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
- Depth);
+ TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
return;
}
}
@@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::TRUNCATE:
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
@@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
} else if (KnownOne.isNegative()) { // sign bit is 1;
@@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
- if (NoNaNsFPMath)
+ if (getTarget().Options.NoNaNsFPMath)
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::CTPOP:
return getConstant(Val.countPopulation(), VT);
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
return getConstant(Val.countLeadingZeros(), VT);
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), VT);
}
}
@@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
- case ISD::FP_ROUND:
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
"Vector element count mismatch!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
- OpOpcode == ISD::ANY_EXTEND) {
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
- else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else
- return Operand.getNode()->getOperand(0);
+ return Operand.getNode()->getOperand(0);
}
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
break;
case ISD::BITCAST:
// Basic sanity checking.
@@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::FNEG) // --X -> X
@@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath) {
+ if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// 0+x --> x
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
default: break;
}
}
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
}
// Canonicalize an UNDEF to the RHS, even over a constant.
@@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath)
+ if (getTarget().Options.UnsafeFPMath)
return N2;
break;
case ISD::MUL:
@@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::SELECT:
if (N1C) {
if (N1C->getZExtValue())
- return N2; // select true, X, Y -> X
- else
- return N3; // select false, X, Y -> Y
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
}
if (N2 == N3) return N2; // select C, X, X -> X
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
- break;
case ISD::INSERT_SUBVECTOR: {
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
- const TargetLowering &TLI,
- std::string &Str, unsigned Offset) {
+ const TargetLowering &TLI, StringRef Str) {
// Handle vector with all elements zero.
if (Str.empty()) {
if (VT.isInteger())
@@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
}
assert(!VT.isVector() && "Can't handle vector type here!");
- unsigned NumBits = VT.getSizeInBits();
- unsigned MSB = NumBits / 8;
+ unsigned NumVTBytes = VT.getSizeInBits() / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
uint64_t Val = 0;
- if (TLI.isLittleEndian())
- Offset = Offset + MSB - 1;
- for (unsigned i = 0; i != MSB; ++i) {
- Val = (Val << 8) | (unsigned char)Str[Offset];
- Offset += TLI.isLittleEndian() ? -1 : 1;
+ if (TLI.isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
}
+
return DAG.getConstant(Val, VT);
}
@@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
/// isMemSrcFromString - Returns true if memcpy source is a string constant.
///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
unsigned SrcDelta = 0;
GlobalAddressSDNode *G = NULL;
if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
if (!G)
return false;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
- if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
- return true;
-
- return false;
+ return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
}
/// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe,
+ bool IsZeroVal,
bool MemcpyStrSrc,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
- NonScalarIntSafe, MemcpyStrSrc,
+ IsZeroVal, MemcpyStrSrc,
DAG.getMachineFunction());
if (VT == MVT::Other) {
@@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- std::string Str;
+ StringRef Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
- Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
SrcPtrInfo.getWithOffset(SrcOff), isVol,
- false, SrcAlign);
+ false, false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- bool NonScalarIntSafe =
+ bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
- NonScalarIntSafe, false, DAG, TLI))
+ IsZeroVal, false, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ /*isTailCall=*/false,
+ /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
Args, *this, dl);
@@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isVolatile, bool isNonTemporal, bool isInvariant,
+ unsigned Alignment, const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
@@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- TBAAInfo);
+ TBAAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isInvariant, unsigned Alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo, Ranges);
}
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
@@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
TBAAInfo);
}
@@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
+ false, LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
@@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(), MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(), MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
return N;
}
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+}
+
/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
@@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
- return ON;
+ return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
}
if (!RemoveNodeFromCSEMaps(N))
@@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
- return cast<MachineSDNode>(E);
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+ }
}
// Allocate a new MachineSDNode.
@@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
}
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
@@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
}
namespace {
@@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal());
+ MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(isNonTemporal() == MMO->isNonTemporal() &&
"Non-temporal encoding error!");
@@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal());
+ MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
- switch (getOpcode()) {
- default:
- if (getOpcode() < ISD::BUILTIN_OP_END)
- return "<<Unknown DAG Node>>";
- if (isMachineOpcode()) {
- if (G)
- if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
- if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->get(getMachineOpcode()).getName();
- return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
- }
- if (G) {
- const TargetLowering &TLI = G->getTargetLoweringInfo();
- const char *Name = TLI.getTargetNodeName(getOpcode());
- if (Name) return Name;
- return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
- }
- return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
- case ISD::DELETED_NODE:
- return "<<Deleted Node!>>";
-#endif
- case ISD::PREFETCH: return "Prefetch";
- case ISD::MEMBARRIER: return "MemBarrier";
- case ISD::ATOMIC_FENCE: return "AtomicFence";
- case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
- case ISD::ATOMIC_SWAP: return "AtomicSwap";
- case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
- case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
- case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
- case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
- case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
- case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
- case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
- case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
- case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
- case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
- case ISD::ATOMIC_LOAD: return "AtomicLoad";
- case ISD::ATOMIC_STORE: return "AtomicStore";
- case ISD::PCMARKER: return "PCMarker";
- case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
- case ISD::SRCVALUE: return "SrcValue";
- case ISD::MDNODE_SDNODE: return "MDNode";
- case ISD::EntryToken: return "EntryToken";
- case ISD::TokenFactor: return "TokenFactor";
- case ISD::AssertSext: return "AssertSext";
- case ISD::AssertZext: return "AssertZext";
-
- case ISD::BasicBlock: return "BasicBlock";
- case ISD::VALUETYPE: return "ValueType";
- case ISD::Register: return "Register";
-
- case ISD::Constant: return "Constant";
- case ISD::ConstantFP: return "ConstantFP";
- case ISD::GlobalAddress: return "GlobalAddress";
- case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
- case ISD::FrameIndex: return "FrameIndex";
- case ISD::JumpTable: return "JumpTable";
- case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
- case ISD::RETURNADDR: return "RETURNADDR";
- case ISD::FRAMEADDR: return "FRAMEADDR";
- case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
- case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
- case ISD::LSDAADDR: return "LSDAADDR";
- case ISD::EHSELECTION: return "EHSELECTION";
- case ISD::EH_RETURN: return "EH_RETURN";
- case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
- case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
- case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
- case ISD::ConstantPool: return "ConstantPool";
- case ISD::ExternalSymbol: return "ExternalSymbol";
- case ISD::BlockAddress: return "BlockAddress";
- case ISD::INTRINSIC_WO_CHAIN:
- case ISD::INTRINSIC_VOID:
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
- unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
- if (IID < Intrinsic::num_intrinsics)
- return Intrinsic::getName((Intrinsic::ID)IID);
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
- return TII->getName(IID);
- llvm_unreachable("Invalid intrinsic ID");
- }
-
- case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
- case ISD::TargetConstant: return "TargetConstant";
- case ISD::TargetConstantFP:return "TargetConstantFP";
- case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
- case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
- case ISD::TargetFrameIndex: return "TargetFrameIndex";
- case ISD::TargetJumpTable: return "TargetJumpTable";
- case ISD::TargetConstantPool: return "TargetConstantPool";
- case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
- case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
- case ISD::CopyToReg: return "CopyToReg";
- case ISD::CopyFromReg: return "CopyFromReg";
- case ISD::UNDEF: return "undef";
- case ISD::MERGE_VALUES: return "merge_values";
- case ISD::INLINEASM: return "inlineasm";
- case ISD::EH_LABEL: return "eh_label";
- case ISD::HANDLENODE: return "handlenode";
-
- // Unary operators
- case ISD::FABS: return "fabs";
- case ISD::FNEG: return "fneg";
- case ISD::FSQRT: return "fsqrt";
- case ISD::FSIN: return "fsin";
- case ISD::FCOS: return "fcos";
- case ISD::FTRUNC: return "ftrunc";
- case ISD::FFLOOR: return "ffloor";
- case ISD::FCEIL: return "fceil";
- case ISD::FRINT: return "frint";
- case ISD::FNEARBYINT: return "fnearbyint";
- case ISD::FEXP: return "fexp";
- case ISD::FEXP2: return "fexp2";
- case ISD::FLOG: return "flog";
- case ISD::FLOG2: return "flog2";
- case ISD::FLOG10: return "flog10";
-
- // Binary operators
- case ISD::ADD: return "add";
- case ISD::SUB: return "sub";
- case ISD::MUL: return "mul";
- case ISD::MULHU: return "mulhu";
- case ISD::MULHS: return "mulhs";
- case ISD::SDIV: return "sdiv";
- case ISD::UDIV: return "udiv";
- case ISD::SREM: return "srem";
- case ISD::UREM: return "urem";
- case ISD::SMUL_LOHI: return "smul_lohi";
- case ISD::UMUL_LOHI: return "umul_lohi";
- case ISD::SDIVREM: return "sdivrem";
- case ISD::UDIVREM: return "udivrem";
- case ISD::AND: return "and";
- case ISD::OR: return "or";
- case ISD::XOR: return "xor";
- case ISD::SHL: return "shl";
- case ISD::SRA: return "sra";
- case ISD::SRL: return "srl";
- case ISD::ROTL: return "rotl";
- case ISD::ROTR: return "rotr";
- case ISD::FADD: return "fadd";
- case ISD::FSUB: return "fsub";
- case ISD::FMUL: return "fmul";
- case ISD::FDIV: return "fdiv";
- case ISD::FMA: return "fma";
- case ISD::FREM: return "frem";
- case ISD::FCOPYSIGN: return "fcopysign";
- case ISD::FGETSIGN: return "fgetsign";
- case ISD::FPOW: return "fpow";
-
- case ISD::FPOWI: return "fpowi";
- case ISD::SETCC: return "setcc";
- case ISD::SELECT: return "select";
- case ISD::VSELECT: return "vselect";
- case ISD::SELECT_CC: return "select_cc";
- case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
- case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
- case ISD::CONCAT_VECTORS: return "concat_vectors";
- case ISD::INSERT_SUBVECTOR: return "insert_subvector";
- case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
- case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
- case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
- case ISD::CARRY_FALSE: return "carry_false";
- case ISD::ADDC: return "addc";
- case ISD::ADDE: return "adde";
- case ISD::SADDO: return "saddo";
- case ISD::UADDO: return "uaddo";
- case ISD::SSUBO: return "ssubo";
- case ISD::USUBO: return "usubo";
- case ISD::SMULO: return "smulo";
- case ISD::UMULO: return "umulo";
- case ISD::SUBC: return "subc";
- case ISD::SUBE: return "sube";
- case ISD::SHL_PARTS: return "shl_parts";
- case ISD::SRA_PARTS: return "sra_parts";
- case ISD::SRL_PARTS: return "srl_parts";
-
- // Conversion operators.
- case ISD::SIGN_EXTEND: return "sign_extend";
- case ISD::ZERO_EXTEND: return "zero_extend";
- case ISD::ANY_EXTEND: return "any_extend";
- case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
- case ISD::TRUNCATE: return "truncate";
- case ISD::FP_ROUND: return "fp_round";
- case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
- case ISD::FP_EXTEND: return "fp_extend";
-
- case ISD::SINT_TO_FP: return "sint_to_fp";
- case ISD::UINT_TO_FP: return "uint_to_fp";
- case ISD::FP_TO_SINT: return "fp_to_sint";
- case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BITCAST: return "bitcast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
- case ISD::CONVERT_RNDSAT: {
- switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
- default: llvm_unreachable("Unknown cvt code!");
- case ISD::CVT_FF: return "cvt_ff";
- case ISD::CVT_FS: return "cvt_fs";
- case ISD::CVT_FU: return "cvt_fu";
- case ISD::CVT_SF: return "cvt_sf";
- case ISD::CVT_UF: return "cvt_uf";
- case ISD::CVT_SS: return "cvt_ss";
- case ISD::CVT_SU: return "cvt_su";
- case ISD::CVT_US: return "cvt_us";
- case ISD::CVT_UU: return "cvt_uu";
- }
- }
-
- // Control flow instructions
- case ISD::BR: return "br";
- case ISD::BRIND: return "brind";
- case ISD::BR_JT: return "br_jt";
- case ISD::BRCOND: return "brcond";
- case ISD::BR_CC: return "br_cc";
- case ISD::CALLSEQ_START: return "callseq_start";
- case ISD::CALLSEQ_END: return "callseq_end";
-
- // Other operators
- case ISD::LOAD: return "load";
- case ISD::STORE: return "store";
- case ISD::VAARG: return "vaarg";
- case ISD::VACOPY: return "vacopy";
- case ISD::VAEND: return "vaend";
- case ISD::VASTART: return "vastart";
- case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
- case ISD::EXTRACT_ELEMENT: return "extract_element";
- case ISD::BUILD_PAIR: return "build_pair";
- case ISD::STACKSAVE: return "stacksave";
- case ISD::STACKRESTORE: return "stackrestore";
- case ISD::TRAP: return "trap";
-
- // Bit manipulation
- case ISD::BSWAP: return "bswap";
- case ISD::CTPOP: return "ctpop";
- case ISD::CTTZ: return "cttz";
- case ISD::CTLZ: return "ctlz";
-
- // Trampolines
- case ISD::INIT_TRAMPOLINE: return "init_trampoline";
- case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
- case ISD::CONDCODE:
- switch (cast<CondCodeSDNode>(this)->get()) {
- default: llvm_unreachable("Unknown setcc condition!");
- case ISD::SETOEQ: return "setoeq";
- case ISD::SETOGT: return "setogt";
- case ISD::SETOGE: return "setoge";
- case ISD::SETOLT: return "setolt";
- case ISD::SETOLE: return "setole";
- case ISD::SETONE: return "setone";
-
- case ISD::SETO: return "seto";
- case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
- case ISD::SETUGT: return "setugt";
- case ISD::SETUGE: return "setuge";
- case ISD::SETULT: return "setult";
- case ISD::SETULE: return "setule";
- case ISD::SETUNE: return "setune";
-
- case ISD::SETEQ: return "seteq";
- case ISD::SETGT: return "setgt";
- case ISD::SETGE: return "setge";
- case ISD::SETLT: return "setlt";
- case ISD::SETLE: return "setle";
- case ISD::SETNE: return "setne";
- }
- }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
- switch (AM) {
- default:
- return "";
- case ISD::PRE_INC:
- return "<pre-inc>";
- case ISD::PRE_DEC:
- return "<pre-dec>";
- case ISD::POST_INC:
- return "<post-inc>";
- case ISD::POST_DEC:
- return "<post-dec>";
- }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
- std::string S = "< ";
-
- if (isZExt())
- S += "zext ";
- if (isSExt())
- S += "sext ";
- if (isInReg())
- S += "inreg ";
- if (isSRet())
- S += "sret ";
- if (isByVal())
- S += "byval ";
- if (isNest())
- S += "nest ";
- if (getByValAlign())
- S += "byval-align:" + utostr(getByValAlign()) + " ";
- if (getOrigAlign())
- S += "orig-align:" + utostr(getOrigAlign()) + " ";
- if (getByValSize())
- S += "byval-size:" + utostr(getByValSize()) + " ";
- return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
- print(dbgs(), G);
- dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (void*)this << ": ";
-
- for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
- if (i) OS << ",";
- if (getValueType(i) == MVT::Other)
- OS << "ch";
- else
- OS << getValueType(i).getEVTString();
- }
- OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
- if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
- if (!MN->memoperands_empty()) {
- OS << "<";
- OS << "Mem:";
- for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
- e = MN->memoperands_end(); i != e; ++i) {
- OS << **i;
- if (llvm::next(i) != e)
- OS << " ";
- }
- OS << ">";
- }
- } else if (const ShuffleVectorSDNode *SVN =
- dyn_cast<ShuffleVectorSDNode>(this)) {
- OS << "<";
- for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (i) OS << ",";
- if (Idx < 0)
- OS << "u";
- else
- OS << Idx;
- }
- OS << ">";
- } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
- OS << '<' << CSDN->getAPIntValue() << '>';
- } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
- OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
- OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
- else {
- OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
- OS << ")>";
- }
- } else if (const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(this)) {
- int64_t offset = GADN->getOffset();
- OS << '<';
- WriteAsOperand(OS, GADN->getGlobal());
- OS << '>';
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = GADN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
- OS << "<" << FIDN->getIndex() << ">";
- } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
- OS << "<" << JTDN->getIndex() << ">";
- if (unsigned int TF = JTDN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
- int offset = CP->getOffset();
- if (CP->isMachineConstantPoolEntry())
- OS << "<" << *CP->getMachineCPVal() << ">";
- else
- OS << "<" << *CP->getConstVal() << ">";
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = CP->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
- OS << "<";
- const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
- if (LBB)
- OS << LBB->getName() << " ";
- OS << (const void*)BBDN->getBasicBlock() << ">";
- } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
- } else if (const ExternalSymbolSDNode *ES =
- dyn_cast<ExternalSymbolSDNode>(this)) {
- OS << "'" << ES->getSymbol() << "'";
- if (unsigned int TF = ES->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
- if (M->getValue())
- OS << "<" << M->getValue() << ">";
- else
- OS << "<null>";
- } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
- if (MD->getMD())
- OS << "<" << MD->getMD() << ">";
- else
- OS << "<null>";
- } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
- OS << ":" << N->getVT().getEVTString();
- }
- else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
- OS << "<" << *LD->getMemOperand();
-
- bool doExt = true;
- switch (LD->getExtensionType()) {
- default: doExt = false; break;
- case ISD::EXTLOAD: OS << ", anyext"; break;
- case ISD::SEXTLOAD: OS << ", sext"; break;
- case ISD::ZEXTLOAD: OS << ", zext"; break;
- }
- if (doExt)
- OS << " from " << LD->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(LD->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
- OS << "<" << *ST->getMemOperand();
-
- if (ST->isTruncatingStore())
- OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(ST->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
- OS << "<" << *M->getMemOperand() << ">";
- } else if (const BlockAddressSDNode *BA =
- dyn_cast<BlockAddressSDNode>(this)) {
- OS << "<";
- WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
- OS << ", ";
- WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
- OS << ">";
- if (unsigned int TF = BA->getTargetFlags())
- OS << " [TF=" << TF << ']';
- }
-
- if (G)
- if (unsigned Order = G->GetOrdering(this))
- OS << " [ORD=" << Order << ']';
-
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
-
- DebugLoc dl = getDebugLoc();
- if (G && !dl.isUnknown()) {
- DIScope
- Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
- OS << " dbg:";
- // Omit the directory, since it's usually long and uninteresting.
- if (Scope.Verify())
- OS << Scope.getFilename();
- else
- OS << "<unknown>";
- OS << ':' << dl.getLine();
- if (dl.getCol() != 0)
- OS << ':' << dl.getCol();
- }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
- }
- print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
- const SelectionDAG *G, unsigned depth,
- unsigned indent) {
- if (depth == 0)
- return;
-
- OS.indent(indent);
-
- N->print(OS, G);
-
- if (depth < 1)
- return;
-
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- // Don't follow chain operands.
- if (N->getOperand(i).getValueType() == MVT::Other)
- continue;
- OS << '\n';
- printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
- }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
- unsigned depth) const {
- printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
- printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- if (N->getOperand(i).getNode()->hasOneUse())
- DumpNodes(N->getOperand(i).getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
- dbgs() << "\n";
- dbgs().indent(indent);
- N->dump(G);
-}
-
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- // If GV has specified alignment, then use it. Otherwise, use the preferred
- // alignment.
- unsigned Align = GV->getAlignment();
- if (!Align) {
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
- if (GVar->hasInitializer()) {
- const TargetData *TD = TLI.getTargetData();
- Align = TD->getPreferredAlignment(GVar);
- }
- }
- if (!Align)
- Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
- }
- return MinAlign(Align, GVOffset);
+ unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+ TLI.getTargetData());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
@@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
return 0;
}
-void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
- for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
- I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
- DumpNodes(N, 2, this);
- }
-
- if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
- dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
- const SelectionDAG *G, VisitedSDNodeSet &once) {
- if (!once.insert(N)) // If we've been here before, return now.
- return;
-
- // Dump the current SDNode, but don't end the line yet.
- OS << std::string(indent, ' ');
- N->printr(OS, G);
-
- // Having printed this SDNode, walk the children:
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
- if (i) OS << ",";
- OS << " ";
-
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
- }
-
- OS << "\n";
-
- // Dump children that have grandchildren on their own line(s).
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
- DumpNodesr(OS, child, indent+2, G, once);
- }
-}
-
-void SDNode::dumpr() const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 095b4001696f..94cb95804f69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -41,13 +41,13 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
- DAG.getIntPtrConstant(1));
+ DAG.getTargetConstant(1, TLI.getPointerTy()));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
- return SDValue();
}
/// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
// The value may have changed - recompute ValueVT.
@@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
AA = &aa;
GFI = gfi;
+ LibInfo = li;
TD = DAG.getTarget().getTargetData();
LPadToCallSiteMap.clear();
}
@@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return DAG.getMergeValues(&Constants[0], Constants.size(),
getCurDebugLoc());
}
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
SmallVector<SDValue, 16> Ops;
- if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (unsigned i = 0; i != NumElements; ++i)
- Ops.push_back(getValue(CP->getOperand(i)));
+ Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
llvm_unreachable("Can't get register for value!");
- return SDValue();
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (!BPI)
return 0;
@@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
Condition = getICmpCondCode(IC->getPredicate());
} else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = ISD::SETEQ; // silence warning.
llvm_unreachable("Unknown compare instruction");
@@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
CopyToExportRegsIfNeeded(&I);
// Update successor info
- InvokeMBB->addSuccessor(Return);
- InvokeMBB->addSuccessor(LandingPad);
+ addSuccessorWithWeight(InvokeMBB, Return);
+ addSuccessorWithWeight(InvokeMBB, LandingPad);
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
}
@@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
AddLandingPadInfo(LP, MMI, MBB);
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
+ return;
+
SmallVector<EVT, 2> ValueVTs;
ComputeValueVTs(TLI, LP.getType(), ValueVTs);
@@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return !DisableJumpTables &&
+ return !TLI.getTargetMachine().Options.DisableJumpTables &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
@@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
- Constant *C = Pivot->Low;
+ const Constant *C = Pivot->Low;
MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
// We know that we branch to the LHS if the Value being switched on is
@@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
- for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
- BasicBlock *SuccBB = SI.getSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
- Cases.push_back(Case(SI.getSuccessorValue(i),
- SI.getSuccessorValue(i),
+ Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// If there is only the default destination, branch to it if it is not the
// next basic block. Otherwise, just fall through.
- if (SI.getNumCases() == 1) {
+ if (!SI.getNumCases()) {
// Update machine-CFG edges.
// If this is not a fall-through branch, emit the branch.
@@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
}
@@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
- DestVT, N, DAG.getIntPtrConstant(0)));
+ DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
}
void SelectionDAGBuilder::visitFPExt(const User &I){
- // FPTrunc is never a no-op cast, no need to check
+ // FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
TLI.getValueType(I.getType()), InVec, InIdx));
}
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
- unsigned MaskNumElts = Mask.size();
- for (unsigned i = 0; i != MaskNumElts; ++i)
- if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (Mask[i] >= 0 && Mask[i] != Low)
return false;
return true;
}
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
- SmallVector<int, 8> Mask;
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
- // Convert the ConstantVector mask operand into an array of ints, with -1
- // representing undef values.
- SmallVector<Constant*, 8> MaskElts;
- cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
- unsigned MaskNumElts = MaskElts.size();
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (isa<UndefValue>(MaskElts[i]))
- Mask.push_back(-1);
- else
- Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
- }
-
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
EVT VT = TLI.getValueType(I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
// lengths match.
- if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
- // The shuffle is concatenating two vectors together.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
- VT, Src1, Src2));
- return;
+ if (SrcNumElts*2 == MaskNumElts) {
+ // First check for Src1 in low and Src2 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+ // Then check for Src2 in low and Src1 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src2, Src1));
+ return;
+ }
}
// Pad both vectors with undefs to make them the same length as the mask.
@@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SmallVector<int, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- if (Idx < (int)SrcNumElts)
- MappedOps.push_back(Idx);
- else
- MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - MaskNumElts;
+ MappedOps.push_back(Idx);
}
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts+1),
- static_cast<int>(SrcNumElts+1)};
+ int MinRange[2] = { static_cast<int>(SrcNumElts),
+ static_cast<int>(SrcNumElts)};
int MaxRange[2] = {-1, -1};
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- int Input = 0;
+ unsigned Input = 0;
if (Idx < 0)
continue;
@@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Check if the access is smaller than the vector size and can we find
// a reasonable extract index.
- int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not
- // Extract.
+ int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
+ // Extract.
int StartIdx[2]; // StartIdx to extract from
- for (int Input=0; Input < 2; ++Input) {
- if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
RangeUse[Input] = 0; // Unused
StartIdx[Input] = 0;
- } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
- // Fits within range but we should see if we can find a good
- // start index that is a multiple of the mask length.
- if (MaxRange[Input] < (int)MaskNumElts) {
- RangeUse[Input] = 1; // Extract from beginning of the vector
- StartIdx[Input] = 0;
- } else {
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
- }
+ continue;
}
+
+ // Find a good start index that is a multiple of the mask length. Then
+ // see if the rest of the elements are in range.
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
if (RangeUse[0] == 0 && RangeUse[1] == 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
// Extract appropriate subvector and generate a vector shuffle
- for (int Input=0; Input < 2; ++Input) {
+ for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (RangeUse[Input] == 0)
Src = DAG.getUNDEF(VT);
@@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SmallVector<int, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- if (Idx < 0)
- MappedOps.push_back(Idx);
- else if (Idx < (int)SrcNumElts)
- MappedOps.push_back(Idx - StartIdx[0]);
- else
- MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ if (Idx >= 0) {
+ if (Idx < (int)SrcNumElts)
+ Idx -= StartIdx[0];
+ else
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ }
+ MappedOps.push_back(Idx);
}
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (Mask[i] < 0) {
- Ops.push_back(DAG.getUNDEF(EltVT));
- } else {
- int Idx = Mask[i];
- SDValue Res;
+ int Idx = Mask[i];
+ SDValue Res;
- if (Idx < (int)SrcNumElts)
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
- EltVT, Src1, DAG.getConstant(Idx, PtrVT));
- else
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
- EltVT, Src2,
- DAG.getConstant(Idx - SrcNumElts, PtrVT));
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
- Ops.push_back(Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src, DAG.getConstant(Idx, PtrVT));
}
+
+ Ops.push_back(Res);
}
setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
@@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(I.getOperand(0));
- Type *Ty = I.getOperand(0)->getType();
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ Type *Ty = I.getOperand(0)->getType()->getScalarType();
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
@@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Amt = ElementSize.logBase2();
IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
N.getValueType(), IdxN,
- DAG.getConstant(Amt, TLI.getPointerTy()));
+ DAG.getConstant(Amt, IdxN.getValueType()));
} else {
SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ bool isInvariant = I.getMetadata("invariant.load") != 0;
unsigned Alignment = I.getAlignment();
const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ Ranges);
Values[i] = L;
Chains[ChainI] = L.getValue(1);
@@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
DebugLoc dl = getCurDebugLoc();
ISD::NodeType NT;
switch (I.getOperation()) {
- default: llvm_unreachable("Unknown atomicrmw operation"); return;
+ default: llvm_unreachable("Unknown atomicrmw operation");
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
SDValue Op = getValue(I.getArgOperand(i));
- assert(TLI.isTypeLegal(Op.getValueType()) &&
- "Intrinsic uses a non-legal type?");
Ops.push_back(Op);
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
- for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
- assert(TLI.isTypeLegal(ValueVTs[Val]) &&
- "Intrinsic uses a non-legal type?");
- }
-#endif // NDEBUG
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
setValue(&I, Result);
+ } else {
+ // Assign order to result here. If the intrinsic does not produce a result,
+ // it won't be mapped to a SDNode and visit() will not assign it an order
+ // number.
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.getNode());
}
}
@@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
}
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
- SDValue Op1 = getValue(I.getArgOperand(0));
- SDValue Op2 = getValue(I.getArgOperand(1));
-
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
- setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
- return 0;
-}
-
/// visitExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
void
@@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
const SDValue &CFR = Ext.getOperand(0);
if (CFR.getOpcode() == ISD::CopyFromReg)
return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
- else
- if (CFR.getOpcode() == ISD::TRUNCATE)
- return getTruncatedArgReg(CFR);
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
}
return 0;
}
@@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI->getFrameRegister(MF);
+ Reg = TRI->getFrameRegister(MF);
if (!Reg && N.getNode()) {
if (N.getOpcode() == ISD::CopyFromReg)
@@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::setjmp:
- return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
- return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
@@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
const Value *Address = DI.getAddress();
- if (!Address || !DIVariable(Variable).Verify())
+ if (!Address || !DIVariable(Variable).Verify()) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
+ }
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
// but do not always have a corresponding SDNode built. The SDNodeOrder
@@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
}
@@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
- // Parameters are handled specially.
- bool isParameter =
- DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter =
+ (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+ isa<Argument>(Address));
+
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
if (isParameter && !AI) {
@@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+ DEBUG(Address->dump());
return 0;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
}
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
return 0;
@@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
@@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
V = BCI->getOperand(0);
const AllocaInst *AI = dyn_cast<AllocaInst>(V);
// Don't handle byval struct arguments or VLAs, for example.
- if (!AI)
+ if (!AI) {
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
return 0;
+ }
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI == FuncInfo.StaticAllocaMap.end())
@@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
return 0;
}
- case Intrinsic::eh_exception: {
- // Insert the EXCEPTIONADDR instruction.
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[1];
- Ops[0] = DAG.getRoot();
- SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
- setValue(&I, Op);
- DAG.setRoot(Op.getValue(1));
- return 0;
- }
-
- case Intrinsic::eh_selector: {
- MachineBasicBlock *CallMBB = FuncInfo.MBB;
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (CallMBB->isLandingPad())
- AddCatchInfo(I, &MMI, CallMBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(&I);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- // Insert the EHSELECTION instruction.
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[2];
- Ops[0] = getValue(I.getArgOperand(0));
- Ops[1] = getRoot();
- SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
- DAG.setRoot(Op.getValue(1));
- setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
- return 0;
- }
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
@@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
- case Intrinsic::eh_sjlj_dispatch_setup: {
- DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- getRoot(), getValue(I.getArgOperand(0))));
- return 0;
- }
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
@@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
+ case Intrinsic::x86_avx_vinsertf128_pd_256:
+ case Intrinsic::x86_avx_vinsertf128_ps_256:
+ case Intrinsic::x86_avx_vinsertf128_si_256:
+ case Intrinsic::x86_avx2_vinserti128: {
+ DebugLoc dl = getCurDebugLoc();
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+ ElVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getConstant(Idx, MVT::i32));
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertuu: {
ISD::CvtCode Code = ISD::CVT_INVALID;
switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::convertff: Code = ISD::CVT_FF; break;
case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
@@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctpop: {
@@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
- return 0;
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
@@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::trap: {
- StringRef TrapFuncName = getTrapFunctionName();
+ StringRef TrapFuncName = TM.Options.getTrapFunctionName();
if (TrapFuncName.empty()) {
DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
return 0;
@@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue> Result =
TLI.LowerCallTo(getRoot(), I.getType(),
false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
Args, DAG, getCurDebugLoc());
DAG.setRoot(Result.second);
return 0;
}
case Intrinsic::uadd_with_overflow:
- return implVisitAluOverflow(I, ISD::UADDO);
case Intrinsic::sadd_with_overflow:
- return implVisitAluOverflow(I, ISD::SADDO);
case Intrinsic::usub_with_overflow:
- return implVisitAluOverflow(I, ISD::USUBO);
case Intrinsic::ssub_with_overflow:
- return implVisitAluOverflow(I, ISD::SSUBO);
case Intrinsic::umul_with_overflow:
- return implVisitAluOverflow(I, ISD::UMULO);
- case Intrinsic::smul_with_overflow:
- return implVisitAluOverflow(I, ISD::SMULO);
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+ return 0;
+ }
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
@@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If there's a possibility that fast-isel has already selected some amount
// of the current basic block, don't emit a tail call.
- if (isTailCall && EnableFastISel)
+ if (isTailCall && TM.Options.EnableFastISel)
isTailCall = false;
std::pair<SDValue,SDValue> Result =
@@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
CS.getCallingConv(),
isTailCall,
+ CS.doesNotReturn(),
!CS.getInstruction()->use_empty(),
Callee, Args, DAG, getCurDebugLoc());
assert((isTailCall || Result.second.getNode()) &&
@@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
Add,
MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
- false, false, 1);
+ false, false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
- false /*nontemporal*/, 1 /* align=1 */);
+ false /*nontemporal*/,
+ false /*isinvariant*/, 1 /* align=1 */);
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- // See if any floating point values are being passed to this function. This is
- // used to emit an undefined reference to fltused on Windows.
- FunctionType *FT =
- cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (FT->isVarArg() &&
- !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type* T = I.getArgOperand(i)->getType();
- for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
- i != e; ++i) {
- if (!i->isFloatingPointTy()) continue;
- MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
- break;
- }
- }
- }
+ ComputeUsesVAFloatArgument(I, &MMI);
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
@@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// can't be a library call.
if (!F->hasLocalLinkage() && F->hasName()) {
StringRef Name = F->getName();
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+ if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+ (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+ (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LHS.getValueType(), LHS, RHS));
return;
}
- } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+ } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+ (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+ (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType()) {
@@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+ } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+ (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+ (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+ } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+ (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+ (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+ } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+ (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+ (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
+ } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+ (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+ (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+ (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+ (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+ (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+ (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+ (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+ (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+ (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+ (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+ (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+ (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+ (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+ (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
} else if (Name == "memcmp") {
if (visitMemCmpCall(I))
return;
@@ -5596,22 +5690,6 @@ public:
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
- /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
- /// busy in OutputRegs/InputRegs.
- void MarkAllocatedRegs(bool isOutReg, bool isInReg,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs,
- const TargetRegisterInfo &TRI) const {
- if (isOutReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
- }
- if (isInReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
- }
- }
-
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
@@ -5659,18 +5737,6 @@ public:
return TLI.getValueType(OpTy, true);
}
-
-private:
- /// MarkRegAndAliases - Mark the specified register and all aliases in the
- /// specified set.
- static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
- const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
- Regs.insert(Reg);
- if (const unsigned *Aliases = TRI.getAliasSet(Reg))
- for (; *Aliases; ++Aliases)
- Regs.insert(*Aliases);
- }
};
typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand.
-/// Input and OutputRegs are the set of already allocated physical registers.
///
static void GetRegistersForValue(SelectionDAG &DAG,
const TargetLowering &TLI,
DebugLoc DL,
- SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs) {
+ SDISelAsmOperandInfo &OpInfo) {
LLVMContext &Context = *DAG.getContext();
- // Compute whether this value requires an input register, an output register,
- // or both.
- bool isOutReg = false;
- bool isInReg = false;
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- isOutReg = true;
-
- // If there is an input constraint that matches this, we need to reserve
- // the input register so no other inputs allocate to it.
- isInReg = OpInfo.hasMatchingInput();
- break;
- case InlineAsm::isInput:
- isInReg = true;
- isOutReg = false;
- break;
- case InlineAsm::isClobber:
- isOutReg = true;
- isInReg = true;
- break;
- }
-
-
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
@@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
- OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
}
@@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
/// ConstraintOperands - Information about all of the constraints.
SDISelAsmOperandInfoVector ConstraintOperands;
- std::set<unsigned> OutputRegs, InputRegs;
-
TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI.ParseConstraints(CS);
@@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// constant pool entry to get its address.
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
- isa<ConstantVector>(OpVal)) {
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
TLI.getPointerTy());
} else {
@@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the output from the appropriate register. Find a register that
// we can use.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate output reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// If this is an indirect operand, store through the pointer after the
// asm.
@@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
- if (Ops.empty())
- report_fatal_error("Invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (Ops.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
@@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
"Don't know how to handle indirect register inputs yet!");
// Copy the input into the appropriate registers.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
Chain, &Flag);
@@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
bool RetSExt, bool RetZExt, bool isVarArg,
bool isInreg, unsigned NumFixedArgs,
CallingConv::ID CallConv, bool isTailCall,
- bool isReturnValueUsed,
+ bool doesNotRet, bool isReturnValueUsed,
SDValue Callee,
ArgListTy &Args, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
}
SmallVector<SDValue, 4> InVals;
- Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
Outs, OutVals, Ins, dl, DAG, InVals);
// Verify that the target's LowerCall behaved as expected.
@@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
- return SDValue();
}
void
@@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
/// entry block, return true. This includes arguments used by switches, since
/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
// With FastISel active, we may be splitting blocks, so force creation
// of virtual registers for all non-dead arguments.
- if (EnableFastISel)
+ if (FastISel)
return A->use_empty();
const BasicBlock *Entry = A->getParent()->begin();
@@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SDB->getCurDebugLoc());
SDB->setValue(I, Res);
- if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
@@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
- if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general. It's also subtly incompatible with the hacks FastISel
@@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I)) {
+ if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
FuncInfo->InitializeRegForValue(I);
SDB->CopyToExportRegsIfNeeded(I);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3472ca..8393b414926a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,11 +67,11 @@ class SIToFPInst;
class StoreInst;
class SwitchInst;
class TargetData;
+class TargetLibraryInfo;
class TargetLowering;
class TruncInst;
class UIToFPInst;
class UnreachableInst;
-class UnwindInst;
class VAArgInst;
class ZExtInst;
@@ -129,13 +129,13 @@ private:
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
- Constant* Low;
- Constant* High;
+ const Constant *Low;
+ const Constant *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
- Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+ Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
@@ -294,6 +294,7 @@ public:
SelectionDAG &DAG;
const TargetData *TD;
AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
/// SwitchCases - Vector of CaseBlock structures used to communicate
/// SwitchInst code generation information.
@@ -338,7 +339,8 @@ public:
HasTailCall(false), Context(dag.getContext()) {
}
- void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
/// clear - Clear out the current SelectionDAG and the associated
/// state and prepare this SelectionDAGBuilder object to be used
@@ -451,7 +453,8 @@ private:
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
- uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
uint32_t Weight = 0);
public:
@@ -471,7 +474,6 @@ private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
- void visitUnwind(const UnwindInst &I);
void visitBinary(const User &I, unsigned OpCode);
void visitShift(const User &I, unsigned Opcode);
@@ -554,8 +556,6 @@ private:
void visitUserOp2(const Instruction &I) {
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
-
- const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..f981afb437b0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+ dbgs() << '\n';
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68b9146adfe1..605509bd227a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -142,14 +217,15 @@ namespace llvm {
CodeGenOpt::Level OptLevel) {
const TargetLowering &TLI = IS->getTargetLowering();
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None ||
+ TLI.getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
- if (TLI.getSchedulingPreference() == Sched::Latency)
- return createTDListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::Hybrid)
return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
@@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
"TargetLowering::EmitInstrWithCustomInserter!";
#endif
llvm_unreachable(0);
- return 0;
}
void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- assert(!MI->getDesc().hasPostISelHook() &&
+ assert(!MI->hasPostISelHook() &&
"If a target marks an instruction with 'hasPostISelHook', "
"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
}
@@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
- CurDAG(new SelectionDAG(tm)),
+ CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
@@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfo>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || EnableFastISel) &&
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
- assert((!EnableFastISelAbort || EnableFastISel) &&
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort requires -fast-isel");
const Function &Fn = *mf.getFunction();
@@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
else
FuncInfo->BPI = 0;
- SDB->init(GFI, *AA);
+ SDB->init(GFI, *AA, LibInfo);
SelectAllBasicBlocks(Fn);
@@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII.get(TargetOpcode::DBG_VALUE))
.addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
.addImm(Offset).addMetadata(Variable);
- EntryMBB->insertAfter(CopyUseMI, NewMI);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
}
}
}
@@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
// Determine if there is a call to setjmp in the machine function.
- MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
// Replace forward-declared registers with the registers containing
// the desired value.
@@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
Worklist.push_back(CurDAG->getRoot().getNode());
- APInt Mask;
APInt KnownZero;
APInt KnownOne;
@@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
- CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+ CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
} while (!Worklist.empty());
}
@@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#endif
{
BlockNumber = FuncInfo->MBB->getNumber();
- BlockName = MF->getFunction()->getNameStr() + ":" +
- FuncInfo->MBB->getBasicBlock()->getNameStr();
+ BlockName = MF->getFunction()->getName().str() + ":" +
+ FuncInfo->MBB->getBasicBlock()->getName().str();
}
DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
@@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in pre-legalize mode.
{
NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
- CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Combining after legalize types", GroupName,
TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in post-legalize mode.
{
NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Scheduling", GroupName,
TimePassesIsEnabled);
- Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
}
if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
- FuncInfo->InsertPt = Scheduler->InsertPos;
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
}
// If the block was split, make sure we update any references that are used to
@@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// Assign the call site to the landing pad's begin label.
MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
+
const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
// Mark exception register as live in.
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
if (Reg) MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
if (Reg) MBB->addLiveIn(Reg);
-
- // FIXME: Hack around an exception handling flaw (PR1508): the personality
- // function and list of typeids logically belong to the invoke (or, if you
- // like, the basic block containing the invoke), and need to be associated
- // with it in the dwarf exception handling tables. Currently however the
- // information is provided by an intrinsic (eh.selector) that can be moved
- // to unexpected places by the optimizers: if the unwind edge is critical,
- // then breaking it can result in the intrinsics being in the successor of
- // the landing pad, not the landing pad itself. This results
- // in exceptions not being caught because no typeids are associated with
- // the invoke. This may not be the only way things can go wrong, but it
- // is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
- if (Br && Br->isUnconditional()) { // Critical edge?
- BasicBlock::const_iterator I, E;
- for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
- if (isa<EHSelectorInst>(I))
- break;
-
- if (I == E)
- // No catch info found - try to extract some from the successor.
- CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
- }
}
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
- if (EnableFastISel)
+ if (TM.Options.EnableFastISel)
FastIS = TLI.createFastISel(*FuncInfo);
// Iterate over all basic blocks in the function.
@@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->setLastLocalValue(0);
}
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
const Instruction *Inst = llvm::prior(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ --NumFastIselRemaining;
continue;
+ }
// Bottom-up: reset the insert pos at the top, after any local-value
// instructions.
@@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
+ --NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
// then see if there is a load right before the selected instructions.
@@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
// If we succeeded, don't re-select the load.
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
continue;
}
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
- ++NumFastIselFailures;
+
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed call: ";
Inst->dump();
@@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
bool HadTailCall = false;
SelectBasicBlock(Inst, BI, HadTailCall);
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+
// If the call was emitted as a tail call, we're done with the block.
if (HadTailCall) {
--BI;
break;
}
+ NumFastIselRemaining = RemainingNow;
continue;
}
if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
// Don't abort, and use a different message for terminator misses.
- ++NumFastIselFailures;
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed terminator: ";
Inst->dump();
}
} else {
- ++NumFastIselFailures;
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
Inst->dump();
@@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
APInt NeededMask = DesiredMask & ~ActualMask;
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+ CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
// If all the missing bits in the or are already known to be set, match!
if ((NeededMask & KnownOne) == NeededMask)
@@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::EntryToken: // These nodes remain the same.
case ISD::BasicBlock:
case ISD::Register:
+ case ISD::RegisterMask:
//case ISD::VALUETYPE:
//case ISD::CONDCODE:
case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cd1647b17b9b..6cde05aea82a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -28,7 +27,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
using namespace llvm;
namespace llvm {
@@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
void SelectionDAG::viewGraph(const std::string &Title) {
// This code is only for debugging!
#ifndef NDEBUG
- ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
false, Title);
#else
errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 907d8d9da1af..09a2b1f3d7a5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,31 +36,9 @@ using namespace llvm;
/// - the promotion of vector elements. This feature is disabled by default
/// and only enabled using this flag.
static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden,
+AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
cl::desc("Allow promotion of integer vector element types"));
-namespace llvm {
-TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
- bool isLocal = GV->hasLocalLinkage();
- bool isDeclaration = GV->isDeclaration();
- // FIXME: what should we do for protected and internal visibility?
- // For variables, is internal different from hidden?
- bool isHidden = GV->hasHiddenVisibility();
-
- if (reloc == Reloc::PIC_) {
- if (isLocal || isHidden)
- return TLSModel::LocalDynamic;
- else
- return TLSModel::GeneralDynamic;
- } else {
- if (!isDeclaration || isHidden)
- return TLSModel::LocalExec;
- else
- return TLSModel::InitialExec;
- }
-}
-}
-
/// InitLibcallNames - Set default libcall names.
///
static void InitLibcallNames(const char **Names) {
@@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10,MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10,MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
- SchedPreferenceInfo = Sched::Latency;
+ SchedPreferenceInfo = Sched::ILP;
JumpBufSize = 0;
JumpBufAlignment = 0;
MinFunctionAlignment = 0;
@@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const {
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
- if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+
return Table;
}
@@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Depth != 0) {
// If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
- TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
- KnownZero = ~KnownOne & NewMask;
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
return false; // Don't fall through, will infinitely loop.
case ISD::AND:
// If the RHS is a constant, check to see if the LHS would be zero without
@@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getNode()->getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- if ((APInt::getHighBitsSet(BitWidth,
- BitWidth - InnerVT.getSizeInBits()) &
- DemandedMask) == 0 &&
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedMask == 1)
+ if (NewMask == 1)
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
break;
case ISD::SIGN_EXTEND_INREG: {
- EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == DemandedMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp, ShiftAmt));
+ }
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getScalarType().getSizeInBits());
+ BitWidth - ExVT.getScalarType().getSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
- EVT.getScalarType().getSizeInBits()) &
+ ExVT.getScalarType().getSizeInBits()) &
NewMask;
// Since the sign extended bits are demanded, we know that the sign
@@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op,
- TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
@@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
- KnownOne |= NewBits;
- KnownZero &= ~NewBits;
+ KnownOne |= NewBits;
+ assert((KnownZero & NewBits) == 0);
} else { // Otherwise, top bits aren't known.
- KnownOne &= ~NewBits;
- KnownZero &= ~NewBits;
+ assert((KnownOne & NewBits) == 0);
+ assert((KnownZero & NewBits) == 0);
}
break;
}
@@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::BITCAST:
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!Op.getOperand(0).getValueType().isVector() &&
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
+ !Op.getOperand(0).getValueType().isVector() &&
NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
@@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// FALL THROUGH
default:
// Just use ComputeMaskedBits to compute output bits.
- TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
break;
}
@@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
}
/// ComputeNumSignBitsForTargetNode - This method can be implemented by
@@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// Fall back to ComputeMaskedBits to catch other known cases.
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
return (KnownZero.countPopulation() == BitWidth - 1) &&
(KnownOne.countPopulation() == 1);
}
@@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getPointerInfo().getWithOffset(bestOffset),
- false, false, NewAlign);
+ false, false, false, NewAlign);
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger())
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (N0.getValueType().isInteger()) {
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ case ZeroOrNegativeOneBooleanContent:
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
+ }
+ }
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
@@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond);
}
}
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
// Simplify (X+Z) == X --> Z == 0
- if (N0.getOperand(0) == N1)
- return DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(0, N0.getValueType()), Cond);
- if (N0.getOperand(1) == N1) {
- if (DAG.isCommutativeBinOp(N0.getOpcode()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(0, N0.getValueType()), Cond);
- else if (N0.getNode()->hasOneUse()) {
- assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
- // (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
}
}
}
@@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
- default: llvm_unreachable("Unknown constraint type!");
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_Memory:
return 3;
}
+ llvm_unreachable("Invalid constraint type");
}
/// Examine constraint type and operand type and determine a weight value.
@@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
- std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
@@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
- if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+ isOperationLegalOrCustom(ISD::MULHS, VT))
Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
DAG.getConstant(magics.m, VT));
- else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
N->getOperand(0),
DAG.getConstant(magics.m, VT)).getNode(), 1);
@@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
- std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
@@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
- if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+ isOperationLegalOrCustom(ISD::MULHU, VT))
Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
- else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
DAG.getConstant(magics.m, VT)).getNode(), 1);
else
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 2609256c8ffa..0016047a134e 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -116,8 +116,7 @@ namespace {
// Branches and invokes do not escape, only unwind, resume, and return
// do.
TerminatorInst *TI = CurBB->getTerminator();
- if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) &&
- !isa<ResumeInst>(TI))
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
Builder.SetInsertPoint(TI->getParent(), TI);
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 160f38f69236..21ae2f5e56eb 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -93,6 +93,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
}
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -124,7 +125,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
}
bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
- return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+ return (MBB && !MBB->empty() && MBB->back().isReturn());
}
// Initialize shrink wrapping DFA sets, called before iterations.
@@ -158,7 +159,7 @@ void PEI::initShrinkWrappingInfo() {
// via --shrink-wrap-func=<funcname>.
#ifndef NDEBUG
if (ShrinkWrapFunc != "") {
- std::string MFName = MF->getFunction()->getNameStr();
+ std::string MFName = MF->getFunction()->getName().str();
ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
}
#endif
@@ -1045,7 +1046,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
return "";
if (MBB->getBasicBlock())
- return MBB->getBasicBlock()->getNameStr();
+ return MBB->getBasicBlock()->getName().str();
std::ostringstream name;
name << "_MBB_" << MBB->getNumber();
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index ded2459d4278..9a86f32d8f96 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,21 +29,20 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include <set>
using namespace llvm;
-static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden,
- cl::desc("Disable the old SjLj EH preparation pass"));
-
STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumUnwinds, "Number of unwinds replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
- class SjLjEHPass : public FunctionPass {
+ class SjLjEHPrepare : public FunctionPass {
const TargetLowering *TLI;
Type *FunctionContextTy;
Constant *RegisterFn;
@@ -54,16 +53,12 @@ namespace {
Constant *StackRestoreFn;
Constant *LSDAAddrFn;
Value *PersonalityFn;
- Constant *SelectorFn;
- Constant *ExceptionFn;
Constant *CallSiteFn;
- Constant *DispatchSetupFn;
Constant *FuncCtxFn;
- Value *CallSite;
- DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap;
+ AllocaInst *FuncCtx;
public:
static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPass(const TargetLowering *tli = NULL)
+ explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
: FunctionPass(ID), TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
@@ -75,28 +70,24 @@ namespace {
private:
bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
void lowerIncomingArguments(Function &F);
void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
-
- void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
- void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
- SwitchInst *CatchSwitch);
- void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
- void splitLandingPad(InvokeInst *II);
- bool insertSjLjEHSupport(Function &F);
+ void insertCallSiteStore(Instruction *I, int Number);
};
} // end anonymous namespace
-char SjLjEHPass::ID = 0;
+char SjLjEHPrepare::ID = 0;
-// Public Interface To the SjLjEHPass pass.
-FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
- return new SjLjEHPass(TLI);
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+ return new SjLjEHPrepare(TLI);
}
// doInitialization - Set up decalarations and types needed to process
// exceptions.
-bool SjLjEHPass::doInitialization(Module &M) {
+bool SjLjEHPrepare::doInitialization(Module &M) {
// Build the function context structure.
// builtin_setjmp uses a five word jbuf
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -123,11 +114,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
- SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
- ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
- DispatchSetupFn
- = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
PersonalityFn = 0;
@@ -136,583 +123,67 @@ bool SjLjEHPass::doInitialization(Module &M) {
/// insertCallSiteStore - Insert a store of the call-site value to the
/// function context
-void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
- Value *CallSite) {
- ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
- Number);
- // Insert a store of the call-site number
- new StoreInst(CallSiteNoC, CallSite, true, I); // volatile
-}
-
-/// splitLandingPad - Split a landing pad. This takes considerable care because
-/// of PHIs and other nasties. The problem is that the jump table needs to jump
-/// to the landing pad block. However, the landing pad block can be jumped to
-/// only by an invoke instruction. So we clone the landingpad instruction into
-/// its own basic block, have the invoke jump to there. The landingpad
-/// instruction's basic block's successor is now the target for the jump table.
-///
-/// But because of PHI nodes, we need to create another basic block for the jump
-/// table to jump to. This is definitely a hack, because the values for the PHI
-/// nodes may not be defined on the edge from the jump table. But that's okay,
-/// because the jump table is simply a construct to mimic what is happening in
-/// the CFG. So the values are mysteriously there, even though there is no value
-/// for the PHI from the jump table's edge (hence calling this a hack).
-void SjLjEHPass::splitLandingPad(InvokeInst *II) {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(),
- ".1", ".2", this, NewBBs);
-
- // Create an empty block so that the jump table has something to jump to
- // which doesn't have any PHI nodes.
- BasicBlock *LPad = NewBBs[0];
- BasicBlock *Succ = *succ_begin(LPad);
- BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land",
- LPad->getParent(), Succ);
- LPad->getTerminator()->eraseFromParent();
- BranchInst::Create(JumpTo, LPad);
- BranchInst::Create(Succ, JumpTo);
- LPadSuccMap[II] = JumpTo;
-
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- Value *Val = PN->removeIncomingValue(LPad, false);
- PN->addIncoming(Val, JumpTo);
- }
-}
-
-/// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
- Value *CallSite,
- SwitchInst *CatchSwitch) {
- ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo);
- // The runtime comes back to the dispatcher with the call_site - 1 in
- // the context. Odd, but there it is.
- ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo - 1);
-
- // If the unwind edge has phi nodes, split the edge.
- if (isa<PHINode>(II->getUnwindDest()->begin())) {
- // FIXME: New EH - This if-condition will be always true in the new scheme.
- if (II->getUnwindDest()->isLandingPad())
- splitLandingPad(II);
- else
- SplitCriticalEdge(II, 1, this);
-
- // If there are any phi nodes left, they must have a single predecessor.
- while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- PN->eraseFromParent();
- }
- }
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+ IRBuilder<> Builder(I);
- // Insert the store of the call site value
- insertCallSiteStore(II, InvokeNo, CallSite);
-
- // Record the call site value for the back end so it stays associated with
- // the invoke.
- CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
-
- // Add a switch case to our unwind block.
- if (BasicBlock *SuccBB = LPadSuccMap[II]) {
- CatchSwitch->addCase(SwitchValC, SuccBB);
- } else {
- CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
- }
+ // Get a reference to the call_site field.
+ Type *Int32Ty = Type::getInt32Ty(I->getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Idxs[2] = { Zero, One };
+ Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
- // We still want this to look like an invoke so we emit the LSDA properly,
- // so we don't transform the invoke into a call here.
+ // Insert a store of the call-site number
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
}
/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
/// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
- if (!LiveBBs.insert(BB).second) return; // already been here.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
+ if (!LiveBBs.insert(BB)) return; // already been here.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
MarkBlocksLiveIn(*PI, LiveBBs);
}
-/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
-/// we spill into a stack location, guaranteeing that there is nothing live
-/// across the unwind edge. This process also splits all critical edges
-/// coming out of invoke's.
-/// FIXME: Move this function to a common utility file (Local.cpp?) so
-/// both SjLj and LowerInvoke can use it.
-void SjLjEHPass::
-splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
- // First step, split all critical edges from invoke instructions.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- InvokeInst *II = Invokes[i];
- SplitCriticalEdge(II, 0, this);
-
- // FIXME: New EH - This if-condition will be always true in the new scheme.
- if (II->getUnwindDest()->isLandingPad())
- splitLandingPad(II);
- else
- SplitCriticalEdge(II, 1, this);
-
- assert(!isa<PHINode>(II->getNormalDest()) &&
- !isa<PHINode>(II->getUnwindDest()) &&
- "Critical edge splitting left single entry phi nodes?");
- }
-
- Function *F = Invokes.back()->getParent()->getParent();
-
- // To avoid having to handle incoming arguments specially, we lower each arg
- // to a copy instruction in the entry block. This ensures that the argument
- // value itself cannot be live across the entry block.
- BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
- while (isa<AllocaInst>(AfterAllocaInsertPt) &&
- isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
- ++AfterAllocaInsertPt;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI) {
- Type *Ty = AI->getType();
- // Aggregate types can't be cast, but are legal argument types, so we have
- // to handle them differently. We use an extract/insert pair as a
- // lightweight method to achieve the same goal.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
- Instruction *NI = InsertValueInst::Create(AI, EI, 0);
- NI->insertAfter(EI);
- AI->replaceAllUsesWith(NI);
- // Set the operand of the instructions back to the AllocaInst.
- EI->setOperand(0, AI);
- NI->setOperand(0, AI);
- } else {
- // This is always a no-op cast because we're casting AI to AI->getType()
- // so src and destination types are identical. BitCast is the only
- // possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Set the operand of the cast instruction back to the AllocaInst.
- // Normally it's forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However,
- // we're replacing it here with the same value it was constructed with.
- // We do this because the above replaceAllUsesWith() clobbered the
- // operand, but we want this one to remain.
- NC->setOperand(0, AI);
- }
- }
-
- // Finally, scan the code looking for instructions with bad live ranges.
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- // Ignore obvious cases we don't have to handle. In particular, most
- // instructions either have no uses or only have a single use inside the
- // current block. Ignore them quickly.
- Instruction *Inst = II;
- if (Inst->use_empty()) continue;
- if (Inst->hasOneUse() &&
- cast<Instruction>(Inst->use_back())->getParent() == BB &&
- !isa<PHINode>(Inst->use_back())) continue;
-
- // If this is an alloca in the entry block, it's not a real register
- // value.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
- if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
- continue;
-
- // Avoid iterator invalidation by copying users to a temporary vector.
- SmallVector<Instruction*,16> Users;
- for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (User->getParent() != BB || isa<PHINode>(User))
- Users.push_back(User);
- }
-
- // Find all of the blocks that this value is live in.
- std::set<BasicBlock*> LiveBBs;
- LiveBBs.insert(Inst->getParent());
- while (!Users.empty()) {
- Instruction *U = Users.back();
- Users.pop_back();
-
- if (!isa<PHINode>(U)) {
- MarkBlocksLiveIn(U->getParent(), LiveBBs);
- } else {
- // Uses for a PHI node occur in their predecessor block.
- PHINode *PN = cast<PHINode>(U);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Inst)
- MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
- }
- }
-
- // Now that we know all of the blocks that this thing is live in, see if
- // it includes any of the unwind locations.
- bool NeedsSpill = false;
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
- if (UnwindBlock != BB && LiveBBs.count(UnwindBlock))
- NeedsSpill = true;
- }
-
- // If we decided we need a spill, do it.
- // FIXME: Spilling this way is overkill, as it forces all uses of
- // the value to be reloaded from the stack slot, even those that aren't
- // in the unwind blocks. We should be more selective.
- if (NeedsSpill) {
- ++NumSpilled;
- DemoteRegToStack(*Inst, true);
- }
- }
-}
-
-/// CreateLandingPadLoad - Load the exception handling values and insert them
-/// into a structure.
-static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr,
- Value *SelAddr,
- BasicBlock::iterator InsertPt) {
- Value *Exn = new LoadInst(ExnAddr, "exn", false,
- InsertPt);
- Type *Ty = Type::getInt8PtrTy(F.getContext());
- Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt);
- Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt);
-
- Ty = StructType::get(Exn->getType(), Sel->getType(), NULL);
- InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty),
- Exn, 0,
- "lpad.val", InsertPt);
- return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt);
-}
-
-/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a
-/// load from the stored values (via CreateLandingPadLoad). This looks through
-/// PHI nodes, and removes them if they are dead.
-static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr,
- Value *SelAddr) {
- if (Inst->use_empty()) return;
-
- while (!Inst->use_empty()) {
- Instruction *I = cast<Instruction>(Inst->use_back());
-
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr);
- if (PN->use_empty()) PN->eraseFromParent();
- continue;
- }
-
- I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I));
- }
-}
-
-bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
- SmallVector<ReturnInst*,16> Returns;
- SmallVector<UnwindInst*,16> Unwinds;
- SmallVector<InvokeInst*,16> Invokes;
-
- // Look through the terminators of the basic blocks to find invokes, returns
- // and unwinds.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- // Remember all return instructions in case we insert an invoke into this
- // function.
- Returns.push_back(RI);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Invokes.push_back(II);
- } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- Unwinds.push_back(UI);
- }
- }
-
- NumInvokes += Invokes.size();
- NumUnwinds += Unwinds.size();
-
- // If we don't have any invokes, there's nothing to do.
- if (Invokes.empty()) return false;
-
- // Find the eh.selector.*, eh.exception and alloca calls.
- //
- // Remember any allocas() that aren't in the entry block, as the
- // jmpbuf saved SP will need to be updated for them.
- //
- // We'll use the first eh.selector to determine the right personality
- // function to use. For SJLJ, we always use the same personality for the
- // whole function, not on a per-selector basis.
- // FIXME: That's a bit ugly. Better way?
- SmallVector<CallInst*,16> EH_Selectors;
- SmallVector<CallInst*,16> EH_Exceptions;
- SmallVector<Instruction*,16> JmpbufUpdatePoints;
-
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- // Note: Skip the entry block since there's nothing there that interests
- // us. eh.selector and eh.exception shouldn't ever be there, and we
- // want to disregard any allocas that are there.
- //
- // FIXME: This is awkward. The new EH scheme won't need to skip the entry
- // block.
- if (BB == F.begin()) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) {
- // FIXME: This will be always non-NULL in the new EH.
- if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
- if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
- }
-
- continue;
- }
-
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->getCalledFunction() == SelectorFn) {
- if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
- EH_Selectors.push_back(CI);
- } else if (CI->getCalledFunction() == ExceptionFn) {
- EH_Exceptions.push_back(CI);
- } else if (CI->getCalledFunction() == StackRestoreFn) {
- JmpbufUpdatePoints.push_back(CI);
- }
- } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- JmpbufUpdatePoints.push_back(AI);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
- // FIXME: This will be always non-NULL in the new EH.
- if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
- if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
- }
- }
- }
-
- // If we don't have any eh.selector calls, we can't determine the personality
- // function. Without a personality function, we can't process exceptions.
- if (!PersonalityFn) return false;
-
- // We have invokes, so we need to add register/unregister calls to get this
- // function onto the global unwind stack.
- //
- // First thing we need to do is scan the whole function for values that are
- // live across unwind edges. Each value that is live across an unwind edge we
- // spill into a stack location, guaranteeing that there is nothing live across
- // the unwind edge. This process also splits all critical edges coming out of
- // invoke's.
- splitLiveRangesAcrossInvokes(Invokes);
-
-
- SmallVector<LandingPadInst*, 16> LandingPads;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- // FIXME: This will be always non-NULL in the new EH.
- if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
- LandingPads.push_back(LPI);
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI) continue;
+ if (EVI->getNumIndices() != 1) continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->getNumUses() == 0)
+ EVI->eraseFromParent();
}
+ if (LPI->getNumUses() == 0) return;
- BasicBlock *EntryBB = F.begin();
- // Create an alloca for the incoming jump buffer ptr and the new jump buffer
- // that needs to be restored on all exits from the function. This is an
- // alloca because the value needs to be added to the global context list.
- unsigned Align = 4; // FIXME: Should be a TLI check?
- AllocaInst *FunctionContext =
- new AllocaInst(FunctionContextTy, 0, Align,
- "fcn_context", F.begin()->begin());
-
- Value *Idxs[2];
- Type *Int32Ty = Type::getInt32Ty(F.getContext());
- Value *Zero = ConstantInt::get(Int32Ty, 0);
- // We need to also keep around a reference to the call_site field
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site",
- EntryBB->getTerminator());
-
- // The exception selector comes back in context->data[1]
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exc_selector_gep",
- EntryBB->getTerminator());
- // The exception value comes back in context->data[0]
- Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exception_gep",
- EntryBB->getTerminator());
-
- // The result of the eh.selector call will be replaced with a a reference to
- // the selector value returned in the function context. We leave the selector
- // itself so the EH analysis later can use it.
- for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
- CallInst *I = EH_Selectors[i];
- Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
- I->replaceAllUsesWith(SelectorVal);
- }
-
- // eh.exception calls are replaced with references to the proper location in
- // the context. Unlike eh.selector, the eh.exception calls are removed
- // entirely.
- for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
- CallInst *I = EH_Exceptions[i];
- // Possible for there to be duplicates, so check to make sure the
- // instruction hasn't already been removed.
- if (!I->getParent()) continue;
- Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
- Type *Ty = Type::getInt8PtrTy(F.getContext());
- Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
- I->replaceAllUsesWith(Val);
- I->eraseFromParent();
- }
-
- for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
- ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr);
-
- // The entry block changes to have the eh.sjlj.setjmp, with a conditional
- // branch to a dispatch block for non-zero returns. If we return normally,
- // we're not handling an exception and just register the function context and
- // continue.
-
- // Create the dispatch block. The dispatch block is basically a big switch
- // statement that goes to all of the invoke landing pads.
- BasicBlock *DispatchBlock =
- BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
- // Insert a load of the callsite in the dispatch block, and a switch on its
- // value. By default, we issue a trap statement.
- BasicBlock *TrapBlock =
- BasicBlock::Create(F.getContext(), "trapbb", &F);
- CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
- "", TrapBlock);
- new UnreachableInst(F.getContext(), TrapBlock);
-
- Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
- DispatchBlock);
- SwitchInst *DispatchSwitch =
- SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
- DispatchBlock);
- // Split the entry block to insert the conditional branch for the setjmp.
- BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
- "eh.sjlj.setjmp.cont");
-
- // Populate the Function Context
- // 1. LSDA address
- // 2. Personality function address
- // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
- // LSDA address
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 4);
- Value *LSDAFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
- EntryBB->getTerminator());
- Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
- EntryBB->getTerminator());
- new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
- Idxs[1] = ConstantInt::get(Int32Ty, 3);
- Value *PersonalityFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
- EntryBB->getTerminator());
- new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
- EntryBB->getTerminator());
-
- // Save the frame pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *JBufPtr
- = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *FramePtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
- EntryBB->getTerminator());
-
- Value *Val = CallInst::Create(FrameAddrFn,
- ConstantInt::get(Int32Ty, 0),
- "fp",
- EntryBB->getTerminator());
- new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
- // Save the stack pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *StackPtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
- EntryBB->getTerminator());
-
- Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
- new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg =
- CastInst::Create(Instruction::BitCast, JBufPtr,
- Type::getInt8PtrTy(F.getContext()), "",
- EntryBB->getTerminator());
- Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
- "",
- EntryBB->getTerminator());
-
- // Add a call to dispatch_setup after the setjmp call. This is expanded to any
- // target-specific setup that needs to be done.
- CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ IRBuilder<>
+ Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
- // check the return value of the setjmp. non-zero goes to dispatcher.
- Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
- ICmpInst::ICMP_EQ, DispatchVal, Zero,
- "notunwind");
- // Nuke the uncond branch.
- EntryBB->getTerminator()->eraseFromParent();
-
- // Put in a new condbranch in its place.
- BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
- // Register the function context and make sure it's known to not throw
- CallInst *Register =
- CallInst::Create(RegisterFn, FunctionContext, "",
- ContBlock->getTerminator());
- Register->setDoesNotThrow();
-
- // At this point, we are all set up, update the invoke instructions to mark
- // their call_site values, and fill in the dispatch switch accordingly.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
- markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
- // Mark call instructions that aren't nounwind as no-action (call_site ==
- // -1). Skip the entry block, as prior to then, no function context has been
- // created for this function and any unexpected exceptions thrown will go
- // directly to the caller's context, which is what we want anyway, so no need
- // to do anything here.
- for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
- for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Ignore calls to the EH builtins (eh.selector, eh.exception)
- Constant *Callee = CI->getCalledFunction();
- if (Callee != SelectorFn && Callee != ExceptionFn
- && !CI->doesNotThrow())
- insertCallSiteStore(CI, -1, CallSite);
- } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
- insertCallSiteStore(RI, -1, CallSite);
- }
- }
-
- // Replace all unwinds with a branch to the unwind handler.
- // ??? Should this ever happen with sjlj exceptions?
- for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
- BranchInst::Create(TrapBlock, Unwinds[i]);
- Unwinds[i]->eraseFromParent();
- }
-
- // Following any allocas not in the entry block, update the saved SP in the
- // jmpbuf to the new value.
- for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
- Instruction *AI = JmpbufUpdatePoints[i];
- Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(AI);
- Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
- StoreStackAddr->insertAfter(StackAddr);
- }
-
- // Finally, for any returns from this function, if this function contains an
- // invoke, add a call to unregister the function context.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i)
- CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
-
- return true;
+ LPI->replaceAllUsesWith(LPadVal);
}
/// setupFunctionContext - Allocate the function context on the stack and fill
/// it with all of the data that we know at this point.
-Value *SjLjEHPass::
+Value *SjLjEHPrepare::
setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
BasicBlock *EntryBB = F.begin();
@@ -721,56 +192,42 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
// because the value needs to be added to the global context list.
unsigned Align =
TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
- AllocaInst *FuncCtx =
+ FuncCtx =
new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
// Fill in the function context structure.
- Value *Idxs[2];
Type *Int32Ty = Type::getInt32Ty(F.getContext());
Value *Zero = ConstantInt::get(Int32Ty, 0);
Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Two = ConstantInt::get(Int32Ty, 2);
+ Value *Three = ConstantInt::get(Int32Ty, 3);
+ Value *Four = ConstantInt::get(Int32Ty, 4);
- // Keep around a reference to the call_site field.
- Idxs[0] = Zero;
- Idxs[1] = One;
- CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site",
- EntryBB->getTerminator());
-
- // Reference the __data field.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data",
- EntryBB->getTerminator());
-
- // The exception value comes back in context->__data[0].
- Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exception_gep",
- EntryBB->getTerminator());
-
- // The exception selector comes back in context->__data[1].
- Idxs[1] = One;
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exn_selector_gep",
- EntryBB->getTerminator());
+ Value *Idxs[2] = { Zero, 0 };
for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
LandingPadInst *LPI = LPads[I];
IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+ // Reference the __data field.
+ Idxs[1] = Two;
+ Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data");
+
+ // The exception values come back in context->__data[0].
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep");
Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
- Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
- Type *LPadType = LPI->getType();
- Value *LPadVal = UndefValue::get(LPadType);
- LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
- LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+ Idxs[1] = One;
+ Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep");
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
- LPI->replaceAllUsesWith(LPadVal);
+ substituteLPadValues(LPI, ExnVal, SelVal);
}
// Personality function
- Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Idxs[1] = Three;
if (!PersonalityFn)
PersonalityFn = LPads[0]->getPersonalityFn();
Value *PersonalityFieldPtr =
@@ -780,11 +237,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
EntryBB->getTerminator());
// LSDA address
- Idxs[1] = ConstantInt::get(Int32Ty, 4);
- Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
- EntryBB->getTerminator());
Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
EntryBB->getTerminator());
+ Idxs[1] = Four;
+ Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
+ EntryBB->getTerminator());
new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
return FuncCtx;
@@ -794,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
/// specially, we lower each arg to a copy instruction in the entry block. This
/// ensures that the argument value itself cannot be live out of the entry
/// block.
-void SjLjEHPass::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
while (isa<AllocaInst>(AfterAllocaInsPt) &&
isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
@@ -838,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) {
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
-void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
- ArrayRef<InvokeInst*> Invokes) {
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
// Finally, scan the code looking for instructions with bad live ranges.
for (Function::iterator
BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
@@ -870,7 +327,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
}
// Find all of the blocks that this value is live in.
- std::set<BasicBlock*> LiveBBs;
+ SmallPtrSet<BasicBlock*, 64> LiveBBs;
LiveBBs.insert(Inst->getParent());
while (!Users.empty()) {
Instruction *U = Users.back();
@@ -893,7 +350,10 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+ << UnwindBlock->getName() << "\n");
NeedsSpill = true;
+ break;
}
}
@@ -902,36 +362,60 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
// the value to be reloaded from the stack slot, even those that aren't
// in the unwind blocks. We should be more selective.
if (NeedsSpill) {
- ++NumSpilled;
DemoteRegToStack(*Inst, true);
+ ++NumSpilled;
}
}
}
+
+ // Go through the landing pads and remove any PHIs there.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+ // Place PHIs into a set to avoid invalidating the iterator.
+ SmallPtrSet<PHINode*, 8> PHIsToDemote;
+ for (BasicBlock::iterator
+ PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ PHIsToDemote.insert(cast<PHINode>(PN));
+ if (PHIsToDemote.empty()) continue;
+
+ // Demote the PHIs to the stack.
+ for (SmallPtrSet<PHINode*, 8>::iterator
+ I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+ DemotePHIToStack(*I);
+
+ // Move the landingpad instruction back to the top of the landing pad block.
+ LPI->moveBefore(UnwindBlock->begin());
+ }
}
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
SmallVector<ReturnInst*, 16> Returns;
SmallVector<InvokeInst*, 16> Invokes;
- SmallVector<LandingPadInst*, 16> LPads;
+ SmallSetVector<LandingPadInst*, 16> LPads;
// Look through the terminators of the basic blocks to find invokes.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
Invokes.push_back(II);
- LPads.push_back(II->getUnwindDest()->getLandingPadInst());
+ LPads.insert(II->getUnwindDest()->getLandingPadInst());
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
Returns.push_back(RI);
}
if (Invokes.empty()) return false;
+ NumInvokes += Invokes.size();
+
lowerIncomingArguments(F);
lowerAcrossUnwindEdges(F, Invokes);
- Value *FuncCtx = setupFunctionContext(F, LPads);
+ Value *FuncCtx =
+ setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
BasicBlock *EntryBB = F.begin();
Type *Int32Ty = Type::getInt32Ty(F.getContext());
@@ -979,7 +463,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
// At this point, we are all set up, update the invoke instructions to mark
// their call_site values.
for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
- insertCallSiteStore(Invokes[I], I + 1, CallSite);
+ insertCallSiteStore(Invokes[I], I + 1);
ConstantInt *CallSiteNum =
ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
@@ -998,9 +482,9 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (!CI->doesNotThrow())
- insertCallSiteStore(CI, -1, CallSite);
+ insertCallSiteStore(CI, -1);
} else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
- insertCallSiteStore(RI, -1, CallSite);
+ insertCallSiteStore(RI, -1);
}
// Register the function context and make sure it's known to not throw
@@ -1008,6 +492,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
EntryBB->getTerminator());
Register->setDoesNotThrow();
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (BB == F.begin())
+ continue;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() != StackRestoreFn)
+ continue;
+ } else if (!isa<AllocaInst>(I)) {
+ continue;
+ }
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(I);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+ }
+
// Finally, for any returns from this function, if this function contains an
// invoke, add a call to unregister the function context.
for (unsigned I = 0, E = Returns.size(); I != E; ++I)
@@ -1016,11 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
return true;
}
-bool SjLjEHPass::runOnFunction(Function &F) {
- bool Res = false;
- if (!DisableOldSjLjEH)
- Res = insertSjLjEHSupport(F);
- else
- Res = setupEntryBlockAndCallSites(F);
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+ bool Res = setupEntryBlockAndCallSites(F);
return Res;
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index ca79cafcf4be..c5bd3a3cae63 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
MachineBasicBlock *mbb = &*mbbItr;
// Insert an index for the MBB start.
- SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+ SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block);
for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
miItr != miEnd; ++miItr) {
@@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(mi, index += SlotIndex::InstrDist));
// Save this base index in the maps.
- mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(back(),
+ SlotIndex::Slot_Block)));
++functionSize;
}
@@ -97,14 +98,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(0, index += SlotIndex::InstrDist));
MBBRanges[mbb->getNumber()].first = blockStartIndex;
- MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
+ MBBRanges[mbb->getNumber()].second = SlotIndex(back(),
+ SlotIndex::Slot_Block);
idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
}
// Sort the Idx2MBBMap
std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
- DEBUG(dump());
+ DEBUG(mf->print(dbgs(), this));
// And we're done!
return false;
@@ -166,7 +168,7 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
if (isValid())
- os << entry().getIndex() << "LudS"[getSlot()];
+ os << entry().getIndex() << "Berd"[getSlot()];
else
os << "invalid";
}
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index b6bbcd7176dd..4cd22eb60f55 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -11,8 +11,8 @@
#include "Spiller.h"
#include "VirtRegMap.h"
-#include "LiveRangeEdit.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,7 +29,7 @@
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, inline_ };
+ enum SpillerName { trivial, inline_ };
}
static cl::opt<SpillerName>
@@ -37,10 +37,9 @@ spillerOpt("spiller",
cl::desc("Spiller to use: (default: standard)"),
cl::Prefix,
cl::values(clEnumVal(trivial, "trivial spiller"),
- clEnumVal(standard, "default spiller"),
clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
- cl::init(standard));
+ cl::init(trivial));
// Spiller virtual destructor implementation.
Spiller::~Spiller() {}
@@ -73,8 +72,9 @@ protected:
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
- void trivialSpillEverywhere(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &newIntervals) {
+ void trivialSpillEverywhere(LiveRangeEdit& LRE) {
+ LiveInterval* li = &LRE.getParent();
+
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
@@ -116,17 +116,14 @@ protected:
}
// Create a new vreg & interval for this instr.
- unsigned newVReg = mri->createVirtualRegister(trc);
- vrm->grow();
- vrm->assignVirt2StackSlot(newVReg, ss);
- LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ LiveInterval *newLI = &LRE.create();
newLI->weight = HUGE_VALF;
// Update the reg operands & kill flags.
for (unsigned i = 0; i < indices.size(); ++i) {
unsigned mopIdx = indices[i];
MachineOperand &mop = mi->getOperand(mopIdx);
- mop.setReg(newVReg);
+ mop.setReg(newLI->reg);
if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
mop.setIsKill(true);
}
@@ -136,33 +133,29 @@ protected:
// Insert reload if necessary.
MachineBasicBlock::iterator miItr(mi);
if (hasUse) {
- tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
tri);
MachineInstr *loadInstr(prior(miItr));
SlotIndex loadIndex =
- lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
- vrm->addSpillSlotUse(ss, loadInstr);
+ lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
- newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+ newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
// Insert store if necessary.
if (hasDef) {
- tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
true, ss, trc, tri);
MachineInstr *storeInstr(llvm::next(miItr));
SlotIndex storeIndex =
- lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
- vrm->addSpillSlotUse(ss, storeInstr);
+ lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
- newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+ newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
-
- newIntervals.push_back(newLI);
}
}
};
@@ -182,60 +175,20 @@ public:
void spill(LiveRangeEdit &LRE) {
// Ignore spillIs - we don't use it.
- trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
+ trivialSpillEverywhere(LRE);
}
};
} // end anonymous namespace
-namespace {
-
-/// Falls back on LiveIntervals::addIntervalsForSpills.
-class StandardSpiller : public Spiller {
-protected:
- MachineFunction *mf;
- LiveIntervals *lis;
- LiveStacks *lss;
- MachineLoopInfo *loopInfo;
- VirtRegMap *vrm;
-public:
- StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : mf(&mf),
- lis(&pass.getAnalysis<LiveIntervals>()),
- lss(&pass.getAnalysis<LiveStacks>()),
- loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
- vrm(&vrm) {}
-
- /// Falls back on LiveIntervals::addIntervalsForSpills.
- void spill(LiveRangeEdit &LRE) {
- std::vector<LiveInterval*> added =
- lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
- loopInfo, *vrm);
- LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
- added.begin(), added.end());
-
- // Update LiveStacks.
- int SS = vrm->getStackSlot(LRE.getReg());
- if (SS == VirtRegMap::NO_STACK_SLOT)
- return;
- const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
- LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
- if (!SI.hasAtLeastOneValue())
- SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
- SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
- }
-};
-
-} // end anonymous namespace
+void Spiller::anchor() { }
llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
switch (spillerOpt) {
- default: assert(0 && "unknown spiller");
case trivial: return new TrivialSpiller(pass, mf, vrm);
- case standard: return new StandardSpiller(pass, mf, vrm);
case inline_: return createInlineSpiller(pass, mf, vrm);
}
+ llvm_unreachable("Invalid spiller optimization");
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 41f1727da439..b7d5beaab1b2 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -22,6 +22,7 @@ namespace llvm {
/// Implementations are utility classes which insert spill or remat code on
/// demand.
class Spiller {
+ virtual void anchor();
public:
virtual ~Spiller() = 0;
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 63627800af69..9959f74d5f27 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,10 +14,10 @@
#define DEBUG_TYPE "regalloc"
#include "SplitKit.h"
-#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -62,13 +62,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
// Compute split points on the first call. The pair is independent of the
// current live interval.
if (!LSP.first.isValid()) {
MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
if (FirstTerm == MBB->end())
- LSP.first = LIS.getMBBEndIdx(MBB);
+ LSP.first = MBBEnd;
else
LSP.first = LIS.getInstructionIndex(FirstTerm);
@@ -80,7 +81,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
I != E;) {
--I;
- if (I->getDesc().isCall()) {
+ if (I->isCall()) {
LSP.second = LIS.getInstructionIndex(I);
break;
}
@@ -89,10 +90,32 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
// If CurLI is live into a landing pad successor, move the last split point
// back to the call that may throw.
- if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
- return LSP.second;
- else
+ if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
+ return LSP.first;
+
+ // Find the value leaving MBB.
+ const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+ if (!VNI)
+ return LSP.first;
+
+ // If the value leaving MBB was defined after the call in MBB, it can't
+ // really be live-in to the landing pad. This can happen if the landing pad
+ // has a PHI, and this register is undef on the exceptional edge.
+ // <rdar://problem/10664933>
+ if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
return LSP.first;
+
+ // Value is properly live-in to the landing pad.
+ // Only allow splits before the call.
+ return LSP.second;
+}
+
+MachineBasicBlock::iterator
+SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
+ SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
+ if (LSP == LIS.getMBBEndIdx(MBB))
+ return MBB->end();
+ return LIS.getInstructionFromIndex(LSP);
}
/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -112,7 +135,7 @@ void SplitAnalysis::analyzeUses() {
I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
++I)
if (!I.getOperand().isUndef())
- UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+ UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
array_pod_sort(UseSlots.begin(), UseSlots.end());
@@ -328,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
// We don't need an AliasAnalysis since we will only be performing
// cheap-as-a-copy remats anyway.
- Edit->anyRematerializable(LIS, TII, 0);
+ Edit->anyRematerializable(0);
}
void SplitEditor::dump() const {
@@ -351,7 +374,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
LiveInterval *LI = Edit->get(RegIdx);
// Create a new value.
- VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
// Use insert for lookup, so we can add missing values with a second lookup.
std::pair<ValueMap::iterator, bool> InsP =
@@ -366,14 +389,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
// If the previous value was a simple mapping, add liveness for it now.
if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
SlotIndex Def = OldVNI->def;
- LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
// No longer a simple mapping. Switch to a complex, non-forced mapping.
InsP.first->second = ValueForcePair();
}
// This is a complex mapping, add liveness for VNI
SlotIndex Def = VNI->def;
- LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
return VNI;
}
@@ -393,7 +416,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
// This was previously a single mapping. Make sure the old def is represented
// by a trivial live range.
SlotIndex Def = VNI->def;
- Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
// Mark as complex mapped, forced.
VFP = ValueForcePair(0, true);
}
@@ -413,33 +436,31 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
// Attempt cheap-as-a-copy rematerialization.
LiveRangeEdit::Remat RM(ParentVNI);
- if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
- Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+ if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
++NumRemats;
} else {
// Can't remat, just insert a copy from parent.
CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
.addReg(Edit->getReg());
Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
- .getDefIndex();
+ .getRegSlot();
++NumCopies;
}
// Define the value in Reg.
- VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
- VNI->setCopy(CopyMI);
- return VNI;
+ return defValue(RegIdx, ParentVNI, Def);
}
/// Create a new virtual register and live interval.
unsigned SplitEditor::openIntv() {
// Create the complement as index 0.
if (Edit->empty())
- Edit->create(LIS, VRM);
+ Edit->create();
// Create the open interval.
OpenIdx = Edit->size();
- Edit->create(LIS, VRM);
+ Edit->create();
return OpenIdx;
}
@@ -497,7 +518,7 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
}
DEBUG(dbgs() << ": valno " << ParentVNI->id);
VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
- LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+ SA.getLastSplitPointIter(&MBB));
RegAssign.insert(VNI->def, End, OpenIdx);
DEBUG(dump());
return VNI->def;
@@ -586,7 +607,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
assert(OpenIdx && "openIntv not called before overlapIntv");
const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
- assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+ assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
"Parent changes value in extended range");
assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
"Range cannot span basic blocks");
@@ -640,7 +661,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
} else {
- SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex();
+ SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
AssignI.setStop(Kill);
}
@@ -780,7 +801,7 @@ void SplitEditor::hoistCopiesForSize() {
SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
Dom.second =
defFromParent(0, ParentVNI, Last, *Dom.first,
- LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def;
+ SA.getLastSplitPointIter(Dom.first))->def;
}
// Remove redundant back-copies that are now known to be dominated by another
@@ -958,7 +979,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// use the same register as the def, so just do that always.
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (MO.isDef() || MO.isUndef())
- Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+ Idx = Idx.getRegSlot(MO.isEarlyClobber());
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
@@ -981,7 +1002,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
if (!Edit->getParent().liveAt(Idx))
continue;
} else
- Idx = Idx.getUseIndex();
+ Idx = Idx.getRegSlot(true);
getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
&MDT, &LIS.getVNInfoAllocator());
@@ -994,8 +1015,8 @@ void SplitEditor::deleteRematVictims() {
LiveInterval *LI = *I;
for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
LII != LIE; ++LII) {
- // Dead defs end at the store slot.
- if (LII->end != LII->valno->def.getNextSlot())
+ // Dead defs end at the dead slot.
+ if (LII->end != LII->valno->def.getDeadSlot())
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
assert(MI && "Missing instruction for dead def");
@@ -1012,7 +1033,7 @@ void SplitEditor::deleteRematVictims() {
if (Dead.empty())
return;
- Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(Dead);
}
void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
@@ -1030,7 +1051,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
VNI->setIsPHIDef(ParentVNI->isPHIDef());
- VNI->setCopy(ParentVNI->getCopy());
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
@@ -1049,7 +1069,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
break;
case SM_Speed:
llvm_unreachable("Spill mode 'speed' not implemented yet");
- break;
}
// Transfer the simply mapped values, check if any are skipped.
@@ -1089,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
SmallVector<LiveInterval*, 8> dups;
dups.push_back(li);
for (unsigned j = 1; j != NumComp; ++j)
- dups.push_back(&Edit->create(LIS, VRM));
+ dups.push_back(&Edit->create());
ConEQ.Distribute(&dups[0], MRI);
// The new intervals all map back to i.
if (LRMap)
@@ -1097,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
}
// Calculate spill weight and allocation hints for new intervals.
- Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
assert(!LRMap || LRMap->size() == Edit->size());
}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index d8fc2122a3c7..4005a3d5cbbf 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -46,9 +46,6 @@ public:
const MachineLoopInfo &Loops;
const TargetInstrInfo &TII;
- // Sorted slot indexes of using instructions.
- SmallVector<SlotIndex, 8> UseSlots;
-
/// Additional information about basic blocks where the current variable is
/// live. Such a block will look like one of these templates:
///
@@ -85,6 +82,9 @@ private:
// Current live interval.
const LiveInterval *CurLI;
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
/// LastSplitPoint - Last legal split point in each basic block in the current
/// function. The first entry is the first terminator, the second entry is the
/// last valid split point for a variable that is live in to a landing pad
@@ -135,7 +135,7 @@ public:
/// getParent - Return the last analyzed interval.
const LiveInterval &getParent() const { return *CurLI; }
- /// getLastSplitPoint - Return that base index of the last valid split point
+ /// getLastSplitPoint - Return the base index of the last valid split point
/// in the basic block numbered Num.
SlotIndex getLastSplitPoint(unsigned Num) {
// Inline the common simple case.
@@ -145,6 +145,9 @@ public:
return computeLastSplitPoint(Num);
}
+ /// getLastSplitPointIter - Returns the last split point as an iterator.
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
+
/// isOriginalEndpoint - Return true if the original live range was killed or
/// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
/// and 'use' for an early-clobber def.
@@ -152,6 +155,10 @@ public:
/// splitting.
bool isOriginalEndpoint(SlotIndex Idx) const;
+ /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+ /// This include both use and def operands, at most one entry per instruction.
+ ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
/// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
/// where CurLI has uses.
ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
deleted file mode 100644
index 77973b72bbc8..000000000000
--- a/lib/CodeGen/Splitter.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loopsplitter"
-
-#include "Splitter.h"
-
-#include "llvm/Module.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-char LoopSplitter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false)
-
-namespace llvm {
-
- class StartSlotComparator {
- public:
- StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
- bool operator()(const MachineBasicBlock *mbb1,
- const MachineBasicBlock *mbb2) const {
- return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
- }
- private:
- LiveIntervals &lis;
- };
-
- class LoopSplit {
- public:
- LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
- : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
- assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
- "Cannot split physical registers.");
- }
-
- LiveInterval& getLI() const { return li; }
-
- MachineLoop& getLoop() const { return loop; }
-
- bool isValid() const { return valid; }
-
- bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
-
- void invalidate() { valid = false; }
-
- void splitIncoming() { inSplit = true; }
-
- void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
-
- void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
-
- void apply() {
- assert(valid && "Attempt to apply invalid split.");
- applyIncoming();
- applyOutgoing();
- copyRanges();
- renameInside();
- }
-
- private:
- LoopSplitter &ls;
- LiveInterval &li;
- MachineLoop &loop;
- bool valid, inSplit;
- std::set<MachineLoop::Edge> outSplits;
- std::vector<MachineInstr*> loopInstrs;
-
- LiveInterval *newLI;
- std::map<VNInfo*, VNInfo*> vniMap;
-
- LiveInterval* getNewLI() {
- if (newLI == 0) {
- const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
- unsigned vreg = ls.mri->createVirtualRegister(trc);
- newLI = &ls.lis->getOrCreateInterval(vreg);
- }
- return newLI;
- }
-
- VNInfo* getNewVNI(VNInfo *oldVNI) {
- VNInfo *newVNI = vniMap[oldVNI];
-
- if (newVNI == 0) {
- newVNI = getNewLI()->createValueCopy(oldVNI,
- ls.lis->getVNInfoAllocator());
- vniMap[oldVNI] = newVNI;
- }
-
- return newVNI;
- }
-
- void applyIncoming() {
- if (!inSplit) {
- return;
- }
-
- MachineBasicBlock *preHeader = loop.getLoopPreheader();
- if (preHeader == 0) {
- assert(ls.canInsertPreHeader(loop) &&
- "Can't insert required preheader.");
- preHeader = &ls.insertPreHeader(loop);
- }
-
- LiveRange *preHeaderRange =
- ls.lis->findExitingRange(li, preHeader);
- assert(preHeaderRange != 0 && "Range not live into preheader.");
-
- // Insert the new copy.
- MachineInstr *copy = BuildMI(*preHeader,
- preHeader->getFirstTerminator(),
- DebugLoc(),
- ls.tii->get(TargetOpcode::COPY))
- .addReg(getNewLI()->reg, RegState::Define)
- .addReg(li.reg, RegState::Kill);
-
- ls.lis->InsertMachineInstrInMaps(copy);
-
- SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-
- VNInfo *newVal = getNewVNI(preHeaderRange->valno);
- newVal->def = copyDefIdx;
- newVal->setCopy(copy);
- li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
-
- getNewLI()->addRange(LiveRange(copyDefIdx,
- ls.lis->getMBBEndIdx(preHeader),
- newVal));
- }
-
- void applyOutgoing() {
-
- for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
- osEnd = outSplits.end();
- osItr != osEnd; ++osItr) {
- MachineLoop::Edge edge = *osItr;
- MachineBasicBlock *outBlock = edge.second;
- if (ls.isCriticalEdge(edge)) {
- assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
- outBlock = &ls.splitEdge(edge, loop);
- }
- LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
- assert(outRange != 0 && "No exiting range?");
-
- MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
- DebugLoc(),
- ls.tii->get(TargetOpcode::COPY))
- .addReg(li.reg, RegState::Define)
- .addReg(getNewLI()->reg, RegState::Kill);
-
- ls.lis->InsertMachineInstrInMaps(copy);
-
- SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-
- // Blow away output range definition.
- outRange->valno->def = ls.lis->getInvalidIndex();
- li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
-
- SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
- assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal =
- getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
-
- getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
- copyDefIdx, newVal));
-
- }
- }
-
- void copyRange(LiveRange &lr) {
- std::pair<bool, LoopSplitter::SlotPair> lsr =
- ls.getLoopSubRange(lr, loop);
-
- if (!lsr.first)
- return;
-
- LiveRange loopRange(lsr.second.first, lsr.second.second,
- getNewVNI(lr.valno));
-
- li.removeRange(loopRange.start, loopRange.end, true);
-
- getNewLI()->addRange(loopRange);
- }
-
- void copyRanges() {
- for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
- iEnd = loopInstrs.end();
- iItr != iEnd; ++iItr) {
- MachineInstr &instr = **iItr;
- SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
- if (instr.modifiesRegister(li.reg, 0)) {
- LiveRange *defRange =
- li.getLiveRangeContaining(instrIdx.getDefIndex());
- if (defRange != 0) // May have caught this already.
- copyRange(*defRange);
- }
- if (instr.readsRegister(li.reg, 0)) {
- LiveRange *useRange =
- li.getLiveRangeContaining(instrIdx.getUseIndex());
- if (useRange != 0) { // May have caught this already.
- copyRange(*useRange);
- }
- }
- }
-
- for (MachineLoop::block_iterator bbItr = loop.block_begin(),
- bbEnd = loop.block_end();
- bbItr != bbEnd; ++bbItr) {
- MachineBasicBlock &loopBlock = **bbItr;
- LiveRange *enteringRange =
- ls.lis->findEnteringRange(li, &loopBlock);
- if (enteringRange != 0) {
- copyRange(*enteringRange);
- }
- }
- }
-
- void renameInside() {
- for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
- iEnd = loopInstrs.end();
- iItr != iEnd; ++iItr) {
- MachineInstr &instr = **iItr;
- for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
- MachineOperand &mop = instr.getOperand(i);
- if (mop.isReg() && mop.getReg() == li.reg) {
- mop.setReg(getNewLI()->reg);
- }
- }
- }
- }
-
- };
-
- void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<MachineDominatorTree>();
- au.addPreserved<MachineDominatorTree>();
- au.addRequired<MachineLoopInfo>();
- au.addPreserved<MachineLoopInfo>();
- au.addPreservedID(RegisterCoalescerPassID);
- au.addPreserved<CalculateSpillWeights>();
- au.addPreserved<LiveStacks>();
- au.addRequired<SlotIndexes>();
- au.addPreserved<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- au.addPreserved<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(au);
- }
-
- bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
-
- mf = &fn;
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
- sis = &getAnalysis<SlotIndexes>();
- lis = &getAnalysis<LiveIntervals>();
- mli = &getAnalysis<MachineLoopInfo>();
- mdt = &getAnalysis<MachineDominatorTree>();
-
- fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
- mf->getFunction()->getName().str();
-
- dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
-
- dumpOddTerminators();
-
-// dbgs() << "----------------------------------------\n";
-// lis->dump();
-// dbgs() << "----------------------------------------\n";
-
-// std::deque<MachineLoop*> loops;
-// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-// dbgs() << "Loops:\n";
-// while (!loops.empty()) {
-// MachineLoop &loop = *loops.front();
-// loops.pop_front();
-// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-
-// dumpLoopInfo(loop);
-// }
-
- //lis->dump();
- //exit(0);
-
- // Setup initial intervals.
- for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
- !lis->intervalIsInOneMBB(*li)) {
- intervals.push_back(li);
- }
- }
-
- processIntervals();
-
- intervals.clear();
-
-// dbgs() << "----------------------------------------\n";
-// lis->dump();
-// dbgs() << "----------------------------------------\n";
-
- dumpOddTerminators();
-
- //exit(1);
-
- return false;
- }
-
- void LoopSplitter::releaseMemory() {
- fqn.clear();
- intervals.clear();
- loopRangeMap.clear();
- }
-
- void LoopSplitter::dumpOddTerminators() {
- for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
- bbItr != bbEnd; ++bbItr) {
- MachineBasicBlock *mbb = &*bbItr;
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
- if (tii->AnalyzeBranch(*mbb, a, b, c)) {
- dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
- dbgs() << " Terminators:\n";
- for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
- iItr != iEnd; ++iItr) {
- MachineInstr *instr= &*iItr;
- dbgs() << " " << *instr << "";
- }
- dbgs() << "\n Listed successors: [ ";
- for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock *succMBB = *sItr;
- dbgs() << succMBB->getNumber() << " ";
- }
- dbgs() << "]\n\n";
- }
- }
- }
-
- void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
- MachineBasicBlock &headerBlock = *loop.getHeader();
- typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
- ExitEdgesList exitEdges;
- loop.getExitEdges(exitEdges);
-
- dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
- for (std::vector<MachineBasicBlock*>::const_iterator
- subBlockItr = loop.getBlocks().begin(),
- subBlockEnd = loop.getBlocks().end();
- subBlockItr != subBlockEnd; ++subBlockItr) {
- MachineBasicBlock &subBlock = **subBlockItr;
- dbgs() << "BB#" << subBlock.getNumber() << " ";
- }
- dbgs() << "], Exit edges: [ ";
- for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
- exitEdgeEnd = exitEdges.end();
- exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
- MachineLoop::Edge &exitEdge = *exitEdgeItr;
- dbgs() << "(MBB#" << exitEdge.first->getNumber()
- << ", MBB#" << exitEdge.second->getNumber() << ") ";
- }
- dbgs() << "], Sub-Loop Headers: [ ";
- for (MachineLoop::iterator subLoopItr = loop.begin(),
- subLoopEnd = loop.end();
- subLoopItr != subLoopEnd; ++subLoopItr) {
- MachineLoop &subLoop = **subLoopItr;
- MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
- dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
- }
- dbgs() << "]\n";
- }
-
- void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
- mbb.updateTerminator();
-
- for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
- miItr != miEnd; ++miItr) {
- if (lis->isNotInMIMap(miItr)) {
- lis->InsertMachineInstrInMaps(miItr);
- }
- }
- }
-
- bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
- MachineBasicBlock *header = loop.getHeader();
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
-
- for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
- pbEnd = header->pred_end();
- pbItr != pbEnd; ++pbItr) {
- MachineBasicBlock *predBlock = *pbItr;
- if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
- return false;
- }
- }
-
- MachineFunction::iterator headerItr(header);
- if (headerItr == mf->begin())
- return true;
- MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
- assert(headerLayoutPred != 0 && "Header should have layout pred.");
-
- return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
- }
-
- MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
- assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
-
- MachineBasicBlock &header = *loop.getHeader();
-
- // Save the preds - we'll need to update them once we insert the preheader.
- typedef std::set<MachineBasicBlock*> HeaderPreds;
- HeaderPreds headerPreds;
-
- for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
- predEnd = header.pred_end();
- predItr != predEnd; ++predItr) {
- if (!loop.contains(*predItr))
- headerPreds.insert(*predItr);
- }
-
- assert(!headerPreds.empty() && "No predecessors for header?");
-
- //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
-
- MachineBasicBlock *preHeader =
- mf->CreateMachineBasicBlock(header.getBasicBlock());
-
- assert(preHeader != 0 && "Failed to create pre-header.");
-
- mf->insert(header, preHeader);
-
- for (HeaderPreds::iterator hpItr = headerPreds.begin(),
- hpEnd = headerPreds.end();
- hpItr != hpEnd; ++hpItr) {
- assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
- MachineBasicBlock &hp = **hpItr;
- hp.ReplaceUsesOfBlockWith(&header, preHeader);
- }
- preHeader->addSuccessor(&header);
-
- MachineBasicBlock *oldLayoutPred =
- llvm::prior(MachineFunction::iterator(preHeader));
- if (oldLayoutPred != 0) {
- updateTerminators(*oldLayoutPred);
- }
-
- lis->InsertMBBInMaps(preHeader);
-
- if (MachineLoop *parentLoop = loop.getParentLoop()) {
- assert(parentLoop->getHeader() != loop.getHeader() &&
- "Parent loop has same header?");
- parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
-
- // Invalidate all parent loop ranges.
- while (parentLoop != 0) {
- loopRangeMap.erase(parentLoop);
- parentLoop = parentLoop->getParentLoop();
- }
- }
-
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval &li = *liItr->second;
-
- // Is this safe for physregs?
- // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
- if (!lis->isLiveInToMBB(li, &header))
- continue;
-
- if (lis->isLiveInToMBB(li, preHeader)) {
- assert(lis->isLiveOutOfMBB(li, preHeader) &&
- "Range terminates in newly added preheader?");
- continue;
- }
-
- bool insertRange = false;
-
- for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
- predEnd = preHeader->pred_end();
- predItr != predEnd; ++predItr) {
- MachineBasicBlock *predMBB = *predItr;
- if (lis->isLiveOutOfMBB(li, predMBB)) {
- insertRange = true;
- break;
- }
- }
-
- if (!insertRange)
- continue;
-
- SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
- assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
- li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
- lis->getMBBEndIdx(preHeader),
- newVal));
- }
-
-
- //dbgs() << "Dumping SlotIndexes:\n";
- //sis->dump();
-
- //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
-
- return *preHeader;
- }
-
- bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
- assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
- if (edge.second->pred_size() > 1)
- return true;
- return false;
- }
-
- bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
- MachineFunction::iterator outBlockItr(edge.second);
- if (outBlockItr == mf->begin())
- return true;
- MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
- assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
- return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
- !tii->AnalyzeBranch(*edge.first, a, b, c));
- }
-
- MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
- MachineLoop &loop) {
-
- MachineBasicBlock &inBlock = *edge.first;
- MachineBasicBlock &outBlock = *edge.second;
-
- assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
- "Splitting non-critical edge?");
-
- //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
- // << " -> MBB#" << outBlock.getNumber() << ")...";
-
- MachineBasicBlock *splitBlock =
- mf->CreateMachineBasicBlock();
-
- assert(splitBlock != 0 && "Failed to create split block.");
-
- mf->insert(&outBlock, splitBlock);
-
- inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
- splitBlock->addSuccessor(&outBlock);
-
- MachineBasicBlock *oldLayoutPred =
- llvm::prior(MachineFunction::iterator(splitBlock));
- if (oldLayoutPred != 0) {
- updateTerminators(*oldLayoutPred);
- }
-
- lis->InsertMBBInMaps(splitBlock);
-
- loopRangeMap.erase(&loop);
-
- MachineLoop *splitParentLoop = loop.getParentLoop();
- while (splitParentLoop != 0 &&
- !splitParentLoop->contains(&outBlock)) {
- splitParentLoop = splitParentLoop->getParentLoop();
- }
-
- if (splitParentLoop != 0) {
- assert(splitParentLoop->contains(&loop) &&
- "Split-block parent doesn't contain original loop?");
- splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
-
- // Invalidate all parent loop ranges.
- while (splitParentLoop != 0) {
- loopRangeMap.erase(splitParentLoop);
- splitParentLoop = splitParentLoop->getParentLoop();
- }
- }
-
-
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval &li = *liItr->second;
- bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
- lis->isLiveInToMBB(li, &outBlock);
- if (lis->isLiveInToMBB(li, splitBlock)) {
- if (!intersects) {
- li.removeRange(lis->getMBBStartIdx(splitBlock),
- lis->getMBBEndIdx(splitBlock), true);
- }
- } else if (intersects) {
- SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
- assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal = li.getNextValue(newDefIdx, 0,
- lis->getVNInfoAllocator());
- li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
- lis->getMBBEndIdx(splitBlock),
- newVal));
- }
- }
-
- //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
-
- return *splitBlock;
- }
-
- LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
- typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
- LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
- if (lrItr == loopRangeMap.end()) {
- LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
- std::copy(loop.block_begin(), loop.block_end(),
- std::inserter(loopMBBs, loopMBBs.begin()));
-
- assert(!loopMBBs.empty() && "No blocks in loop?");
-
- LoopRanges &loopRanges = loopRangeMap[&loop];
- assert(loopRanges.empty() && "Loop encountered but not processed?");
- SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
- loopRanges.push_back(
- std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
- lis->getInvalidIndex()));
- for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
- curBlockEnd = loopMBBs.end();
- curBlockItr != curBlockEnd; ++curBlockItr) {
- SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
- if (newStart != oldEnd) {
- loopRanges.back().second = oldEnd;
- loopRanges.push_back(std::make_pair(newStart,
- lis->getInvalidIndex()));
- }
- oldEnd = lis->getMBBEndIdx(*curBlockItr);
- }
-
- loopRanges.back().second =
- lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
-
- return loopRanges;
- }
- return lrItr->second;
- }
-
- std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
- const LiveRange &lr,
- MachineLoop &loop) {
- LoopRanges &loopRanges = getLoopRanges(loop);
- LoopRanges::iterator lrItr = loopRanges.begin(),
- lrEnd = loopRanges.end();
- while (lrItr != lrEnd && lr.start >= lrItr->second) {
- ++lrItr;
- }
-
- if (lrItr == lrEnd) {
- SlotIndex invalid = lis->getInvalidIndex();
- return std::make_pair(false, SlotPair(invalid, invalid));
- }
-
- SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
- SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
-
- return std::make_pair(true, SlotPair(srStart, srEnd));
- }
-
- void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
- LoopRanges &loopRanges = getLoopRanges(loop);
- dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
- for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
- lrItr != lrEnd; ++lrItr) {
- dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
- }
- dbgs() << "]\n";
- }
-
- void LoopSplitter::processHeader(LoopSplit &split) {
- MachineBasicBlock &header = *split.getLoop().getHeader();
- //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
-
- if (!lis->isLiveInToMBB(split.getLI(), &header))
- return; // Not live in, but nothing wrong so far.
-
- MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
- if (!preHeader) {
-
- if (!canInsertPreHeader(split.getLoop())) {
- split.invalidate();
- return; // Couldn't insert a pre-header. Bail on this interval.
- }
-
- for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
- predEnd = header.pred_end();
- predItr != predEnd; ++predItr) {
- if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
- split.splitIncoming();
- break;
- }
- }
- } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
- split.splitIncoming();
- }
- }
-
- void LoopSplitter::processLoopExits(LoopSplit &split) {
- typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
- ExitEdgesList exitEdges;
- split.getLoop().getExitEdges(exitEdges);
-
- //dbgs() << " Processing loop exits:\n";
-
- for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
- exitEdgeEnd = exitEdges.end();
- exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
- MachineLoop::Edge exitEdge = *exitEdgeItr;
-
- LiveRange *outRange =
- split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
-
- if (outRange != 0) {
- if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
- split.invalidate();
- return;
- }
-
- split.splitOutgoing(exitEdge);
- }
- }
- }
-
- void LoopSplitter::processLoopUses(LoopSplit &split) {
- std::set<MachineInstr*> processed;
-
- for (MachineRegisterInfo::reg_iterator
- rItr = mri->reg_begin(split.getLI().reg),
- rEnd = mri->reg_end();
- rItr != rEnd; ++rItr) {
- MachineInstr &instr = *rItr;
- if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
- split.addLoopInstr(&instr);
- processed.insert(&instr);
- }
- }
-
- //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
- // << " in blocks [ ";
- //dbgs() << "]\n";
- }
-
- bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
- assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
- "Attempt to split physical register.");
-
- LoopSplit split(*this, li, loop);
- processHeader(split);
- if (split.isValid())
- processLoopExits(split);
- if (split.isValid())
- processLoopUses(split);
- if (split.isValid() /* && split.isWorthwhile() */) {
- split.apply();
- DEBUG(dbgs() << "Success.\n");
- return true;
- }
- DEBUG(dbgs() << "Failed.\n");
- return false;
- }
-
- void LoopSplitter::processInterval(LiveInterval &li) {
- std::deque<MachineLoop*> loops;
- std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-
- while (!loops.empty()) {
- MachineLoop &loop = *loops.front();
- loops.pop_front();
- DEBUG(
- dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
- << loop.getHeader()->getNumber() << " ";
- );
- if (!splitOverLoop(li, loop)) {
- // Couldn't split over outer loop, schedule sub-loops to be checked.
- std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
- }
- }
- }
-
- void LoopSplitter::processIntervals() {
- while (!intervals.empty()) {
- LiveInterval &li = *intervals.front();
- intervals.pop_front();
-
- assert(!lis->intervalIsInOneMBB(li) &&
- "Single interval in process worklist.");
-
- processInterval(li);
- }
- }
-
-}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
deleted file mode 100644
index 9fb1b8b30139..000000000000
--- a/lib/CodeGen/Splitter.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SPLITTER_H
-#define LLVM_CODEGEN_SPLITTER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
- class LiveInterval;
- class LiveIntervals;
- struct LiveRange;
- class LoopSplit;
- class MachineDominatorTree;
- class MachineRegisterInfo;
- class SlotIndexes;
- class TargetInstrInfo;
- class VNInfo;
-
- class LoopSplitter : public MachineFunctionPass {
- friend class LoopSplit;
- public:
- static char ID;
-
- LoopSplitter() : MachineFunctionPass(ID) {
- initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
- virtual bool runOnMachineFunction(MachineFunction &fn);
-
- virtual void releaseMemory();
-
-
- private:
-
- MachineFunction *mf;
- LiveIntervals *lis;
- MachineLoopInfo *mli;
- MachineRegisterInfo *mri;
- MachineDominatorTree *mdt;
- SlotIndexes *sis;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
-
- std::string fqn;
- std::deque<LiveInterval*> intervals;
-
- typedef std::pair<SlotIndex, SlotIndex> SlotPair;
- typedef std::vector<SlotPair> LoopRanges;
- typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
- LoopRangeMap loopRangeMap;
-
- void dumpLoopInfo(MachineLoop &loop);
-
- void dumpOddTerminators();
-
- void updateTerminators(MachineBasicBlock &mbb);
-
- bool canInsertPreHeader(MachineLoop &loop);
- MachineBasicBlock& insertPreHeader(MachineLoop &loop);
-
- bool isCriticalEdge(MachineLoop::Edge &edge);
- bool canSplitEdge(MachineLoop::Edge &edge);
- MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
-
- LoopRanges& getLoopRanges(MachineLoop &loop);
- std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
- MachineLoop &loop);
-
- void dumpLoopRanges(MachineLoop &loop);
-
- void processHeader(LoopSplit &split);
- void processLoopExits(LoopSplit &split);
- void processLoopUses(LoopSplit &split);
-
- bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
-
- void processInterval(LiveInterval &li);
-
- void processIntervals();
- };
-
-}
-
-#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 1f0e5a2711ae..43a6ad8c97a4 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
- // We apparently only care about character arrays.
- if (!AT->getElementType()->isIntegerTy(8))
- continue;
-
+ if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
// If an array has more than SSPBufferSize bytes of allocated space,
// then we emit stack protectors.
if (SSPBufferSize <= TD->getTypeAllocSize(AT))
return true;
- }
}
}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 57cbe1ba5960..1e940b1d0711 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "stackcoloring"
-#include "VirtRegMap.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/Passes.h"
@@ -40,29 +39,17 @@ DisableSharing("no-stack-slot-sharing",
cl::init(false), cl::Hidden,
cl::desc("Suppress slot sharing during stack coloring"));
-static cl::opt<bool>
-ColorWithRegsOpt("color-ss-with-regs",
- cl::init(false), cl::Hidden,
- cl::desc("Color stack slots with free registers"));
-
-
static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
-STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs");
-STATISTIC(NumLoadElim, "Number of loads eliminated");
-STATISTIC(NumStoreElim, "Number of stores eliminated");
STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
bool ColorWithRegs;
LiveStacks* LS;
- VirtRegMap* VRM;
MachineFrameInfo *MFI;
- MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
const MachineLoopInfo *loopInfo;
// SSIntervals - Spill slot intervals.
@@ -98,18 +85,12 @@ namespace {
MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
}
- StackSlotColoring(bool RegColor) :
- MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
- initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
- }
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveStacks>();
- AU.addRequired<VirtRegMap>();
- AU.addPreserved<VirtRegMap>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
@@ -117,9 +98,6 @@ namespace {
}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char* getPassName() const {
- return "Stack Slot Coloring";
- }
private:
void InitializeSlots();
@@ -127,41 +105,23 @@ namespace {
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
bool ColorSlots(MachineFunction &MF);
- bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
- SmallVector<SmallVector<int, 4>, 16> &RevMap,
- BitVector &SlotIsReg);
void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
MachineFunction &MF);
- bool PropagateBackward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg);
- bool PropagateForward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg);
- void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
- unsigned Reg, const TargetRegisterClass *RC,
- SmallSet<unsigned, 4> &Defs,
- MachineFunction &MF);
- bool AllMemRefsCanBeUnfolded(int SS);
bool RemoveDeadStores(MachineBasicBlock* MBB);
};
} // end anonymous namespace
char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
"Stack Slot Coloring", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
"Stack Slot Coloring", false, false)
-FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
- return new StackSlotColoring(RegColor);
-}
-
namespace {
// IntervalSorter - Comparison predicate that sort live intervals by
// their weight.
@@ -248,79 +208,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
return false;
}
-/// ColorSlotsWithFreeRegs - If there are any free registers available, try
-/// replacing spill slots references with registers instead.
-bool
-StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
- SmallVector<SmallVector<int, 4>, 16> &RevMap,
- BitVector &SlotIsReg) {
- if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
- return false;
-
- bool Changed = false;
- DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
- LiveInterval *li = SSIntervals[i];
- int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
- if (!UsedColors[SS] || li->weight < 20)
- // If the weight is < 20, i.e. two references in a loop with depth 1,
- // don't bother with it.
- continue;
-
- // These slots allow to share the same registers.
- bool AllColored = true;
- SmallVector<unsigned, 4> ColoredRegs;
- for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
- int RSS = RevMap[SS][j];
- const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
- // If it's not colored to another stack slot, try coloring it
- // to a "free" register.
- if (!RC) {
- AllColored = false;
- continue;
- }
- unsigned Reg = VRM->getFirstUnusedRegister(RC);
- if (!Reg) {
- AllColored = false;
- continue;
- }
- if (!AllMemRefsCanBeUnfolded(RSS)) {
- AllColored = false;
- continue;
- } else {
- DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
- << TRI->getName(Reg) << '\n');
- ColoredRegs.push_back(Reg);
- SlotMapping[RSS] = Reg;
- SlotIsReg.set(RSS);
- Changed = true;
- }
- }
-
- // Register and its sub-registers are no longer free.
- while (!ColoredRegs.empty()) {
- unsigned Reg = ColoredRegs.back();
- ColoredRegs.pop_back();
- VRM->setRegisterUsed(Reg);
- // If reg is a callee-saved register, it will have to be spilled in
- // the prologue.
- MRI->setPhysRegUsed(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- VRM->setRegisterUsed(*AS);
- MRI->setPhysRegUsed(*AS);
- }
- }
- // This spill slot is dead after the rewrites
- if (AllColored) {
- MFI->RemoveStackObject(SS);
- ++NumEliminated;
- }
- }
- DEBUG(dbgs() << '\n');
-
- return Changed;
-}
-
/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
///
int StackSlotColoring::ColorSlot(LiveInterval *li) {
@@ -372,7 +259,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
SmallVector<int, 16> SlotMapping(NumObjs, -1);
SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
- BitVector SlotIsReg(NumObjs);
BitVector UsedColors(NumObjs);
DEBUG(dbgs() << "Color spill slot intervals:\n");
@@ -404,31 +290,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
DEBUG(dbgs() << '\n');
#endif
- // Can we "color" a stack slot with a unused register?
- Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
-
if (!Changed)
return false;
// Rewrite all MO_FrameIndex operands.
SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
- bool isReg = SlotIsReg[SS];
int NewFI = SlotMapping[SS];
- if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+ if (NewFI == -1 || (NewFI == (int)SS))
continue;
- const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
- if (!isReg)
- RewriteInstruction(RefMIs[i], SS, NewFI, MF);
- else {
- // Rewrite to use a register instead.
- unsigned MBBId = RefMIs[i]->getParent()->getNumber();
- SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
- UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
- }
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
}
// Delete unused stack slots.
@@ -441,28 +315,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
return true;
}
-/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
-/// spill slot index can be unfolded.
-bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
- SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
- for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
- MachineInstr *MI = RefMIs[i];
- if (TII->isLoadFromStackSlot(MI, SS) ||
- TII->isStoreToStackSlot(MI, SS))
- // Restore and spill will become copies.
- return true;
- if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
- return false;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = MI->getOperand(j);
- if (MO.isFI() && MO.getIndex() != SS)
- // If it uses another frameindex, we can, currently* unfold it.
- return false;
- }
- }
- return true;
-}
-
/// RewriteInstruction - Rewrite specified instruction by replacing references
/// to old frame index with new one.
void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
@@ -489,179 +341,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
(*I)->setValue(NewSV);
}
-/// PropagateBackward - Traverse backward and look for the definition of
-/// OldReg. If it can successfully update all of the references with NewReg,
-/// do so and return true.
-bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg) {
- if (MII == MBB->begin())
- return false;
-
- SmallVector<MachineOperand*, 4> Uses;
- SmallVector<MachineOperand*, 4> Refs;
- while (--MII != MBB->begin()) {
- bool FoundDef = false; // Not counting 2address def.
-
- Uses.clear();
- const MCInstrDesc &MCID = MII->getDesc();
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
- if (Reg == OldReg) {
- if (MO.isImplicit())
- return false;
-
- // Abort the use is actually a sub-register def. We don't have enough
- // information to figure out if it is really legal.
- if (MO.getSubReg() || MII->isSubregToReg())
- return false;
-
- const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
- if (RC && !RC->contains(NewReg))
- return false;
-
- if (MO.isUse()) {
- Uses.push_back(&MO);
- } else {
- Refs.push_back(&MO);
- if (!MII->isRegTiedToUseOperand(i))
- FoundDef = true;
- }
- } else if (TRI->regsOverlap(Reg, NewReg)) {
- return false;
- } else if (TRI->regsOverlap(Reg, OldReg)) {
- if (!MO.isUse() || !MO.isKill())
- return false;
- }
- }
-
- if (FoundDef) {
- // Found non-two-address def. Stop here.
- for (unsigned i = 0, e = Refs.size(); i != e; ++i)
- Refs[i]->setReg(NewReg);
- return true;
- }
-
- // Two-address uses must be updated as well.
- for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- Refs.push_back(Uses[i]);
- }
- return false;
-}
-
-/// PropagateForward - Traverse forward and look for the kill of OldReg. If
-/// it can successfully update all of the uses with NewReg, do so and
-/// return true.
-bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg) {
- if (MII == MBB->end())
- return false;
-
- SmallVector<MachineOperand*, 4> Uses;
- while (++MII != MBB->end()) {
- bool FoundKill = false;
- const MCInstrDesc &MCID = MII->getDesc();
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
- if (Reg == OldReg) {
- if (MO.isDef() || MO.isImplicit())
- return false;
-
- // Abort the use is actually a sub-register use. We don't have enough
- // information to figure out if it is really legal.
- if (MO.getSubReg())
- return false;
-
- const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
- if (RC && !RC->contains(NewReg))
- return false;
- if (MO.isKill())
- FoundKill = true;
-
- Uses.push_back(&MO);
- } else if (TRI->regsOverlap(Reg, NewReg) ||
- TRI->regsOverlap(Reg, OldReg))
- return false;
- }
- if (FoundKill) {
- for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- Uses[i]->setReg(NewReg);
- return true;
- }
- }
- return false;
-}
-
-/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
-/// folded memory references and replacing those references with register
-/// references instead.
-void
-StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
- unsigned Reg,
- const TargetRegisterClass *RC,
- SmallSet<unsigned, 4> &Defs,
- MachineFunction &MF) {
- MachineBasicBlock *MBB = MI->getParent();
- if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
- if (PropagateForward(MI, MBB, DstReg, Reg)) {
- DEBUG(dbgs() << "Eliminated load: ");
- DEBUG(MI->dump());
- ++NumLoadElim;
- } else {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
- DstReg).addReg(Reg);
- ++NumRegRepl;
- }
-
- if (!Defs.count(Reg)) {
- // If this is the first use of Reg in this MBB and it wasn't previously
- // defined in MBB, add it to livein.
- MBB->addLiveIn(Reg);
- Defs.insert(Reg);
- }
- } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
- if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
- DEBUG(dbgs() << "Eliminated store: ");
- DEBUG(MI->dump());
- ++NumStoreElim;
- } else {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
- .addReg(SrcReg);
- ++NumRegRepl;
- }
-
- // Remember reg has been defined in MBB.
- Defs.insert(Reg);
- } else {
- SmallVector<MachineInstr*, 4> NewMIs;
- bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
- (void)Success; // Silence compiler warning.
- assert(Success && "Failed to unfold!");
- MachineInstr *NewMI = NewMIs[0];
- MBB->insert(MI, NewMI);
- ++NumRegRepl;
-
- if (NewMI->readsRegister(Reg)) {
- if (!Defs.count(Reg))
- // If this is the first use of Reg in this MBB and it wasn't previously
- // defined in MBB, add it to livein.
- MBB->addLiveIn(Reg);
- Defs.insert(Reg);
- }
- }
- MBB->erase(MI);
-}
/// RemoveDeadStores - Scan through a basic block and look for loads followed
/// by stores. If they're both using the same stack slot, then the store is
@@ -679,33 +358,33 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
I != E; ++I) {
if (DCELimit != -1 && (int)NumDead >= DCELimit)
break;
-
+
MachineBasicBlock::iterator NextMI = llvm::next(I);
if (NextMI == MBB->end()) continue;
-
+
int FirstSS, SecondSS;
unsigned LoadReg = 0;
unsigned StoreReg = 0;
if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
-
+
++NumDead;
changed = true;
-
+
if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
++NumDead;
toErase.push_back(I);
}
-
+
toErase.push_back(NextMI);
++I;
}
-
+
for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I)
(*I)->eraseFromParent();
-
+
return changed;
}
@@ -713,32 +392,27 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Stack Slot Coloring **********\n"
- << "********** Function: "
+ << "********** Function: "
<< MF.getFunction()->getName() << '\n';
});
MFI = MF.getFrameInfo();
- MRI = &MF.getRegInfo();
TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
LS = &getAnalysis<LiveStacks>();
- VRM = &getAnalysis<VirtRegMap>();
loopInfo = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
unsigned NumSlots = LS->getNumIntervals();
- if (NumSlots < 2) {
- if (NumSlots == 0 || !VRM->HasUnusedRegisters())
- // Nothing to do!
- return false;
- }
+ if (NumSlots == 0)
+ // Nothing to do!
+ return false;
// If there are calls to setjmp or sigsetjmp, don't perform stack slot
// coloring. The stack could be modified before the longjmp is executed,
// resulting in the wrong value being used afterwards. (See
// <rdar://problem/8007500>.)
- if (MF.callsSetJmp())
+ if (MF.exposesReturnsTwice())
return false;
// Gather spill slot references
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 260cc0ee50a5..c6fdc7382435 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -228,7 +228,6 @@ static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
return &MO;
}
}
- return NULL;
}
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
@@ -390,12 +389,10 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
MachineOperand *LastUse = findLastUse(MBB, SrcReg);
assert(LastUse);
SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
- SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+ SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
LastUse->setIsKill(true);
}
- LI->renumber();
-
Allocator.Reset();
RegNodeMap.clear();
PHISrcDefs.clear();
@@ -745,7 +742,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
// Set the phi-def flag for the VN at this PHI.
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
assert(DestVNI);
DestVNI->setIsPHIDef(true);
@@ -756,7 +753,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
DestVNI->def = MBBStartIndex;
DestLI.addRange(LiveRange(MBBStartIndex,
- PHIIndex.getDefIndex(),
+ PHIIndex.getRegSlot(),
DestVNI));
return;
}
@@ -779,22 +776,21 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
- CopyInstr,
LI->getVNInfoAllocator());
CopyVNI->setIsPHIDef(true);
CopyLI.addRange(LiveRange(MBBStartIndex,
- DestCopyIndex.getDefIndex(),
+ DestCopyIndex.getRegSlot(),
CopyVNI));
// Adjust DestReg's live interval to adjust for its new definition at
// CopyInstr.
LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+ DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
- VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
assert(DestVNI);
- DestVNI->def = DestCopyIndex.getDefIndex();
+ DestVNI->def = DestCopyIndex.getRegSlot();
InsertedDestCopies[CopyReg] = CopyInstr;
}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3a6211a0f3e6..8ebfbcae785b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -56,10 +56,10 @@ typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
namespace {
/// TailDuplicatePass - Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
- bool PreRegAlloc;
const TargetInstrInfo *TII;
MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
+ bool PreRegAlloc;
// SSAUpdateVRs - A list of virtual registers for which to update SSA form.
SmallVector<unsigned, 16> SSAUpdateVRs;
@@ -70,11 +70,10 @@ namespace {
public:
static char ID;
- explicit TailDuplicatePass(bool PreRA) :
- MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+ explicit TailDuplicatePass() :
+ MachineFunctionPass(ID), PreRegAlloc(false) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "Tail Duplication"; }
private:
void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -118,14 +117,16 @@ namespace {
char TailDuplicatePass::ID = 0;
}
-FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
- return new TailDuplicatePass(PreRegAlloc);
-}
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
+ false, false)
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ PreRegAlloc = MRI->isSSA();
bool MadeChange = false;
while (TailDuplicateBlocks(MF))
@@ -432,7 +433,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
MO.setReg(VI->second);
}
}
- PredBB->insert(PredBB->end(), NewMI);
+ PredBB->insert(PredBB->instr_end(), NewMI);
}
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
@@ -553,7 +554,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
bool HasIndirectbr = false;
if (!TailBB.empty())
- HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+ HasIndirectbr = TailBB.back().isIndirectBranch();
if (HasIndirectbr && PreRegAlloc)
MaxDuplicateCount = 20;
@@ -561,22 +562,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
- ++I) {
+ for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->getDesc().isNotDuplicable())
+ if (I->isNotDuplicable())
return false;
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->getDesc().isReturn())
+ if (PreRegAlloc && I->isReturn())
return false;
// Avoid duplicating calls before register allocation. Calls presents a
// barrier to register allocation so duplicating them may end up increasing
// spills.
- if (PreRegAlloc && I->getDesc().isCall())
+ if (PreRegAlloc && I->isCall())
return false;
if (!I->isPHI() && !I->isDebugValue())
@@ -611,7 +611,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
++I;
if (I == E)
return true;
- return I->getDesc().isUnconditionalBranch();
+ return I->isUnconditionalBranch();
}
static bool
@@ -778,8 +778,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- while (I != TailBB->end()) {
+ // Use instr_iterator here to properly handle bundles, e.g.
+ // ARM Thumb2 IT block.
+ MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+ while (I != TailBB->instr_end()) {
MachineInstr *MI = &*I;
++I;
if (MI->isPHI()) {
@@ -824,7 +826,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
// AnalyzeBranch.
- if (PrevBB->succ_size() == 1 &&
+ if (PrevBB->succ_size() == 1 &&
!TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
!TailBB->hasAddressTaken()) {
@@ -849,6 +851,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
MI->eraseFromParent();
}
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 122f8696e2ce..cadb87815dbe 100644
--- a/lib/Target/TargetFrameLowering.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,4 +1,4 @@
-//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index f32678f12b0a..2beb9281e35b 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -77,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(Idx1).getReg();
unsigned Reg2 = MI->getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
bool Reg1IsKill = MI->getOperand(Idx1).isKill();
bool Reg2IsKill = MI->getOperand(Idx2).isKill();
// If destination is tied to either of the commuted source register, then
@@ -85,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
Reg2IsKill = false;
Reg0 = Reg2;
+ SubReg0 = SubReg2;
} else if (HasDef && Reg0 == Reg2 &&
MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
Reg1IsKill = false;
Reg0 = Reg1;
+ SubReg0 = SubReg1;
}
if (NewMI) {
@@ -97,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
MachineFunction &MF = *MI->getParent()->getParent();
if (HasDef)
return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
- .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
- .addReg(Reg2, getKillRegState(Reg2IsKill))
- .addReg(Reg1, getKillRegState(Reg2IsKill));
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0)
+ .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+ .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
else
return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
- .addReg(Reg2, getKillRegState(Reg2IsKill))
- .addReg(Reg1, getKillRegState(Reg2IsKill));
+ .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+ .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
}
- if (HasDef)
+ if (HasDef) {
MI->getOperand(0).setReg(Reg0);
+ MI->getOperand(0).setSubReg(SubReg0);
+ }
MI->getOperand(Idx2).setReg(Reg1);
MI->getOperand(Idx1).setReg(Reg2);
+ MI->getOperand(Idx2).setSubReg(SubReg1);
+ MI->getOperand(Idx1).setSubReg(SubReg2);
MI->getOperand(Idx2).setIsKill(Reg1IsKill);
MI->getOperand(Idx1).setIsKill(Reg2IsKill);
return MI;
@@ -121,6 +131,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
+ assert(!MI->isBundle() &&
+ "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
@@ -136,11 +149,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
}
+bool
+TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
bool MadeChange = false;
+
+ assert(!MI->isBundle() &&
+ "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -218,7 +248,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
MachineFunction &MF) const {
- assert(!Orig->getDesc().isNotDuplicable() &&
+ assert(!Orig->isNotDuplicable() &&
"Instruction cannot be duplicated");
return MF.CloneMachineInstr(Orig);
}
@@ -288,16 +318,15 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
- NewMI->getDesc().mayStore()) &&
+ NewMI->mayStore()) &&
"Folded a def to a non-store!");
assert((!(Flags & MachineMemOperand::MOLoad) ||
- NewMI->getDesc().mayLoad()) &&
+ NewMI->mayLoad()) &&
"Folded a use to a non-load!");
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
@@ -332,7 +361,7 @@ MachineInstr*
TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const {
- assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+ assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
#ifndef NDEBUG
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
@@ -360,7 +389,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetMachine &TM = MF.getTarget();
const TargetInstrInfo &TII = *TM.getInstrInfo();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
// Remat clients assume operand 0 is the defined register.
if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
@@ -383,10 +411,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
return true;
- const MCInstrDesc &MCID = MI->getDesc();
-
// Avoid instructions obviously unsafe for remat.
- if (MCID.isNotDuplicable() || MCID.mayStore() ||
+ if (MI->isNotDuplicable() || MI->mayStore() ||
MI->hasUnmodeledSideEffects())
return false;
@@ -396,7 +422,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return false;
// Avoid instructions which load from potentially varying memory.
- if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+ if (MI->mayLoad() && !MI->isInvariantLoad(AA))
return false;
// If any of the registers accessed are non-constant, conservatively assume
@@ -414,19 +440,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI.def_empty(Reg))
- return false;
- BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
- if (AllocatableRegs.test(Reg))
+ if (!MRI.isConstantPhysReg(Reg, MF))
return false;
- // Check for a def among the register's aliases too.
- for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- if (!MRI.def_empty(AliasReg))
- return false;
- if (AllocatableRegs.test(AliasReg))
- return false;
- }
} else {
// A physreg def. We can't remat it.
return false;
@@ -457,7 +472,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const{
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Don't attempt to schedule around any instruction that defines
@@ -493,3 +508,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return (ScheduleHazardRecognizer *)
new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
+
+int
+TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3848f4d4d4c4..9925185be120 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -17,6 +17,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -53,11 +54,9 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
report_fatal_error("We do not support this DWARF encoding yet!");
case dwarf::DW_EH_PE_absptr:
return Mang->getSymbol(GV);
- break;
case dwarf::DW_EH_PE_pcrel: {
return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
Mang->getSymbol(GV)->getName());
- break;
}
}
}
@@ -78,14 +77,14 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Flags,
SectionKind::getDataRel(),
0, Label->getName());
+ unsigned Size = TM.getTargetData()->getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(8);
+ Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
- const MCExpr *E = MCConstantExpr::Create(8, getContext());
+ const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
Streamer.EmitLabel(Label);
- unsigned Size = TM.getTargetData()->getPointerSize();
Streamer.EmitSymbolValue(Sym, Size);
}
@@ -189,6 +188,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
static const char *getSectionPrefixForGlobal(SectionKind Kind) {
if (Kind.isText()) return ".text.";
if (Kind.isReadOnly()) return ".rodata.";
+ if (Kind.isBSS()) return ".bss.";
if (Kind.isThreadData()) return ".tdata.";
if (Kind.isThreadBSS()) return ".tbss.";
@@ -217,7 +217,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// If this global is linkonce/weak and the target handles this by emitting it
// into a 'uniqued' section name, create and return the section now.
if ((GV->isWeakForLinker() || EmitUniquedSection) &&
- !Kind.isCommon() && !Kind.isBSS()) {
+ !Kind.isCommon()) {
const char *Prefix;
Prefix = getSectionPrefixForGlobal(Kind);
@@ -342,10 +342,92 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
+const MCSection *
+TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticCtorSection;
+
+ std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticDtorSection;
+
+ std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
+
//===----------------------------------------------------------------------===//
// MachO
//===----------------------------------------------------------------------===//
+/// emitModuleFlags - Emit the module flags that specify the garbage collection
+/// information.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler *Mang, const TargetMachine &TM) const {
+ unsigned VersionVal = 0;
+ unsigned GCFlags = 0;
+ StringRef SectionVal;
+
+ for (ArrayRef<Module::ModuleFlagEntry>::iterator
+ i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+ const Module::ModuleFlagEntry &MFE = *i;
+
+ // Ignore flags with 'Require' behavior.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ Value *Val = MFE.Val;
+
+ if (Key == "Objective-C Image Info Version")
+ VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+ else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only")
+ GCFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ else if (Key == "Objective-C Image Info Section")
+ SectionVal = cast<MDString>(Val)->getString();
+ }
+
+ // The section is mandatory. If we don't have it, then we don't have GC info.
+ if (SectionVal.empty()) return;
+
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty())
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Invalid section specifier '" + Section + "': " +
+ ErrorCode + ".");
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize,
+ SectionKind::getDataNoRel());
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(getContext().
+ GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(VersionVal, 4);
+ Streamer.EmitIntValue(GCFlags, 4);
+ Streamer.AddBlankLine();
+}
+
const MCSection *TargetLoweringObjectFileMachO::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
@@ -358,11 +440,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
TAA, TAAParsed, StubSize);
if (!ErrorCode.empty()) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Global variable '" + GV->getNameStr() +
- "' has an invalid section specifier '" + GV->getSection()+
- "': " + ErrorCode + ".");
- // Fall back to dropping it into the data section.
- return DataSection;
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' has an invalid section specifier '" +
+ GV->getSection() + "': " + ErrorCode + ".");
}
// Get the section.
@@ -379,9 +459,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
// to reject it here.
if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Global variable '" + GV->getNameStr() +
- "' section type or attributes does not match previous"
- " section specifier");
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' section type or attributes does not match previous"
+ " section specifier");
}
return S;
@@ -536,9 +616,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
- MachOMMI.getGVStubEntry(SSym);
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
MCSymbol *Sym = Mang->getSymbol(GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
@@ -568,6 +646,11 @@ getCOFFSectionFlags(SectionKind K) {
COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isThreadLocal())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
else if (K.isReadOnly())
Flags |=
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -594,6 +677,8 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
return ".text$";
if (Kind.isBSS ())
return ".bss$";
+ if (Kind.isThreadLocal())
+ return ".tls$";
if (Kind.isWriteable())
return ".data$";
return ".rdata$";
@@ -603,7 +688,6 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
const MCSection *TargetLoweringObjectFileCOFF::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
- assert(!Kind.isThreadLocal() && "Doesn't support TLS");
// If this global is linkonce/weak and the target handles this by emitting it
// into a 'uniqued' section name, create and return the section now.
@@ -624,6 +708,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isText())
return getTextSection();
+ if (Kind.isThreadLocal())
+ return getTLSDataSection();
+
return getDataSection();
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..0f59d0169e18
--- /dev/null
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line. When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+ return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+ return TrapFuncName;
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index d87937822280..c30b1333bb2a 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -56,14 +57,18 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
STATISTIC(NumReMats, "Number of instructions re-materialized");
STATISTIC(NumDeletes, "Number of dead instructions deleted");
+STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
// DistanceMap - Keep track the distance of a MI from the start of the
// current basic block.
@@ -120,6 +125,18 @@ namespace {
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi, unsigned Dist);
+ bool isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI, MachineBasicBlock *MBB);
+
+ bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+ bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+
bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
@@ -152,7 +169,6 @@ namespace {
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -225,12 +241,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->getDesc().isTerminator())
+ KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
// position and the kill use, then it's not safe to sink it.
- //
+ //
// FIXME: This can be sped up if there is an easy way to query whether an
// instruction is before or after another instruction. Then we can use
// MachineRegisterInfo def / use instead.
@@ -273,7 +289,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
KillMO->setIsKill(false);
KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
KillMO->setIsKill(true);
-
+
if (LV)
LV->replaceKillInstruction(SavedReg, KillMI, MI);
@@ -319,7 +335,7 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
continue; // Current use.
OtherUse = true;
// There is at least one other use in the MBB that will clobber the
- // register.
+ // register.
if (isTwoAddrUse(UseMI, Reg))
return true;
}
@@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
+/// findLocalKill - Look for an instruction below MI in the MBB that kills the
+/// specified register. Returns null if there are any other Reg use between the
+/// instructions.
+static
+MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
+ MachineInstr *MI, MachineRegisterInfo *MRI,
+ DenseMap<MachineInstr*, unsigned> &DistanceMap) {
+ MachineInstr *KillMI = 0;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI || UseMI->getParent() != MBB)
+ continue;
+ if (DistanceMap.count(UseMI))
+ continue;
+ if (!UI.getOperand().isKill())
+ return 0;
+ if (KillMI)
+ return 0; // -O0 kill markers cannot be trusted?
+ KillMI = UseMI;
+ }
+
+ return KillMI;
+}
+
/// findOnlyInterestingUse - Given a register, if has a single in-basic block
/// use, return the use instruction if it's a copy or a two-address use.
static
@@ -528,6 +570,9 @@ bool
TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
// Determine if it's profitable to commute this two address instruction. In
// general, we want no uses between this instruction and the definition of
// the two-address register.
@@ -544,7 +589,7 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
// %reg1029<def> = MOV8rr %reg1028
// %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
// insert => %reg1030<def> = MOV8rr %reg1029
- // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
if (!MI->killsRegister(regC))
return false;
@@ -770,10 +815,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
static bool isSafeToDelete(MachineInstr *MI,
const TargetInstrInfo *TII,
SmallVector<unsigned, 4> &Kills) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayStore() || MCID.isCall())
+ if (MI->mayStore() || MI->isCall())
return false;
- if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+ if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -852,28 +896,316 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
return true;
}
+/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool
+TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+ if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ if (TII->getInstrLatency(InstrItins, MI) > 1)
+ // FIXME: Needs more sophisticated heuristics.
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef())
+ Defs.insert(MOReg);
+ else {
+ Uses.insert(MOReg);
+ if (MO.isKill() && MOReg != Reg)
+ Kills.insert(MOReg);
+ }
+ }
+
+ // Move the copies connected to MI down as well.
+ MachineBasicBlock::iterator From = MI;
+ MachineBasicBlock::iterator To = llvm::next(From);
+ while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
+ Defs.insert(To->getOperand(0).getReg());
+ ++To;
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+ for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(MOReg))
+ // Physical register use would be clobbered.
+ return false;
+ if (!MO.isDead() && Defs.count(MOReg))
+ // May clobber a physical register def.
+ // FIXME: This may be too conservative. It's ok if the instruction
+ // is sunken completely below the use.
+ return false;
+ } else {
+ if (Defs.count(MOReg))
+ return false;
+ if (MOReg != Reg &&
+ ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ }
+ }
+ }
+
+ // Move debug info as well.
+ while (From != MBB->begin() && llvm::prior(From)->isDebugValue())
+ --From;
+
+ // Copies following MI may have been moved as well.
+ nmi = To;
+ MBB->splice(KillPos, MBB, From, To);
+ DistanceMap.erase(DI);
+
+ if (LV) {
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ } else {
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ MO.setIsKill(false);
+ }
+ MI->addRegisterKilled(Reg, 0);
+ }
+
+ return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ MachineInstr *DefMI = &*DI;
+ if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+ continue;
+ if (DefMI == MI)
+ return true; // MI is defining something KillMI uses
+ DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+ if (DDI == DistanceMap.end())
+ return true; // Below MI
+ unsigned DefDist = DDI->second;
+ assert(Dist > DefDist && "Visited def already?");
+ if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+ return true;
+ }
+ return false;
+}
+
+/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool
+TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+ if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ SmallSet<unsigned, 2> LiveDefs;
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MOReg)
+ continue;
+ if (isDefTooClose(MOReg, DI->second, MI, MBB))
+ return false;
+ Uses.insert(MOReg);
+ if (MO.isKill() && MOReg != Reg)
+ Kills.insert(MOReg);
+ } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Defs.insert(MOReg);
+ if (!MO.isDead())
+ LiveDefs.insert(MOReg);
+ }
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ SmallVector<unsigned, 2> OtherDefs;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse()) {
+ if (Defs.count(MOReg))
+ // Moving KillMI can clobber the physical register if the def has
+ // not been seen.
+ return false;
+ if (Kills.count(MOReg))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ } else {
+ OtherDefs.push_back(MOReg);
+ }
+ }
+
+ for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+ unsigned MOReg = OtherDefs[i];
+ if (Uses.count(MOReg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ LiveDefs.count(MOReg))
+ return false;
+ // Physical register def is seen.
+ Defs.erase(MOReg);
+ }
+ }
+
+ // Move the old kill above MI, don't forget to move debug info as well.
+ MachineBasicBlock::iterator InsertPos = mi;
+ while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+ --InsertPos;
+ MachineBasicBlock::iterator From = KillMI;
+ MachineBasicBlock::iterator To = llvm::next(From);
+ while (llvm::prior(From)->isDebugValue())
+ --From;
+ MBB->splice(InsertPos, MBB, From, To);
+
+ nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+ DistanceMap.erase(DI);
+
+ if (LV) {
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ } else {
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ MO.setIsKill(false);
+ }
+ MI->addRegisterKilled(Reg, 0);
+ }
+ return true;
+}
+
/// TryInstructionTransform - For the case where an instruction has a single
/// pair of tied register operands, attempt some transformations that may
/// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy. Returns true if the tied operands
-/// are eliminated altogether.
+/// coalescing away the register copy. Returns true if no copy needs to be
+/// inserted to untie mi's operands (either because they were untied, or
+/// because mi was rescheduled, and will be visited again later).
bool TwoAddressInstructionPass::
TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
SmallPtrSet<MachineInstr*, 8> &Processed) {
- const MCInstrDesc &MCID = mi->getDesc();
- unsigned regA = mi->getOperand(DstIdx).getReg();
- unsigned regB = mi->getOperand(SrcIdx).getReg();
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ MachineInstr &MI = *mi;
+ unsigned regA = MI.getOperand(DstIdx).getReg();
+ unsigned regB = MI.getOperand(SrcIdx).getReg();
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
// If regA is dead and the instruction can be deleted, just delete
// it so it doesn't clobber regB.
- bool regBKilled = isKilled(*mi, regB, MRI, TII);
- if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+ bool regBKilled = isKilled(MI, regB, MRI, TII);
+ if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
++NumDeletes;
return true; // Done with this instruction.
@@ -885,20 +1217,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
unsigned regCIdx = ~0U;
bool TryCommute = false;
bool AggressiveCommute = false;
- if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
- TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+ if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
+ TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
if (SrcIdx == SrcOp1)
regCIdx = SrcOp2;
else if (SrcIdx == SrcOp2)
regCIdx = SrcOp1;
if (regCIdx != ~0U) {
- regC = mi->getOperand(regCIdx).getReg();
- if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+ regC = MI.getOperand(regCIdx).getReg();
+ if (!regBKilled && isKilled(MI, regC, MRI, TII))
// If C dies but B does not, swap the B and C operands.
// This makes the live ranges of A and C joinable.
TryCommute = true;
- else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+ else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
TryCommute = true;
AggressiveCommute = true;
}
@@ -913,10 +1245,17 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule this MI below it.
+ if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+ ++NumReSchedDowns;
+ return true;
+ }
+
if (TargetRegisterInfo::isVirtualRegister(regA))
ScanUses(regA, &*mbbi, Processed);
- if (MCID.isConvertibleTo3Addr()) {
+ if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -928,6 +1267,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
}
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule it before this MI if it's legal.
+ if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+ ++NumReSchedUps;
+ return true;
+ }
+
// If this is an instruction with a load folded into it, try unfolding
// the load, e.g. avoid this:
// movq %rdx, %rcx
@@ -936,11 +1282,11 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// movq (%rax), %rcx
// addq %rdx, %rcx
// because it's preferable to schedule a load than a register copy.
- if (MCID.mayLoad() && !regBKilled) {
+ if (MI.mayLoad() && !regBKilled) {
// Determine if a load can be unfolded.
unsigned LoadRegIndex;
unsigned NewOpc =
- TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+ TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
/*UnfoldLoad=*/true,
/*UnfoldStore=*/false,
&LoadRegIndex);
@@ -950,12 +1296,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineFunction &MF = *mbbi->getParent();
// Unfold the load.
- DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+ if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -986,21 +1332,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// Success, or at least we made an improvement. Keep the unfolded
// instructions and discard the original.
if (LV) {
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() &&
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
- LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
else {
assert(NewMIs[1]->killsRegister(MO.getReg()) &&
"Kill missing after load unfold!");
- LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
}
}
- } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
if (NewMIs[1]->registerDefIsDead(MO.getReg()))
LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
else {
@@ -1013,7 +1359,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
}
- mi->eraseFromParent();
+ MI.eraseFromParent();
mi = NewMIs[1];
if (TransformSuccess)
return true;
@@ -1035,18 +1381,19 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "Machine Function\n");
const TargetMachine &TM = MF.getTarget();
MRI = &MF.getRegInfo();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
+ InstrItins = TM.getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
AA = &getAnalysis<AliasAnalysis>();
+ OptLevel = TM.getOptLevel();
bool MadeChange = false;
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
- DEBUG(dbgs() << "********** Function: "
+ DEBUG(dbgs() << "********** Function: "
<< MF.getFunction()->getName() << '\n');
// This pass takes the function out of SSA form.
@@ -1177,7 +1524,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// If it's safe and profitable, remat the definition instead of
// copying it.
if (DefMI &&
- DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isAsCheapAsAMove() &&
DefMI->isSafeToReMat(TII, AA, regB) &&
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
@@ -1248,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
MadeChange = true;
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
- }
- // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
- if (mi->isInsertSubreg()) {
- // From %reg = INSERT_SUBREG %reg, %subreg, subidx
- // To %reg:subidx = COPY %subreg
- unsigned SubIdx = mi->getOperand(3).getImm();
- mi->RemoveOperand(3);
- assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
- mi->getOperand(0).setSubReg(SubIdx);
- mi->RemoveOperand(1);
- mi->setDesc(TII->get(TargetOpcode::COPY));
- DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
}
// Clear TiedOperands here instead of at the top of the loop
@@ -1298,6 +1645,36 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg,
}
}
+// Find the first def of Reg, assuming they are all in the same basic block.
+static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
+ SmallPtrSet<MachineInstr*, 8> Defs;
+ MachineInstr *First = 0;
+ for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
+ First = MI;
+ if (!First)
+ return 0;
+
+ MachineBasicBlock *MBB = First->getParent();
+ MachineBasicBlock::iterator A = First, B = First;
+ bool Moving;
+ do {
+ Moving = false;
+ if (A != MBB->begin()) {
+ Moving = true;
+ --A;
+ if (Defs.erase(A)) First = A;
+ }
+ if (B != MBB->end()) {
+ Defs.erase(B);
+ ++B;
+ Moving = true;
+ }
+ } while (Moving && !Defs.empty());
+ assert(Defs.empty() && "Instructions outside basic block!");
+ return First;
+}
+
/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
/// EXTRACT_SUBREG from the same register and to the same virtual register
/// with different sub-register indices, attempt to combine the
@@ -1380,8 +1757,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
CanCoalesce = false;
break;
}
- // Keep track of one of the uses.
- SomeMI = UseMI;
+ // Keep track of one of the uses. Preferably the first one which has a
+ // <def,undef> flag.
+ if (!SomeMI || UseMI->getOperand(0).isUndef())
+ SomeMI = UseMI;
}
if (!CanCoalesce)
continue;
@@ -1390,7 +1769,9 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
SomeMI->getDebugLoc(),
TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define, NewDstSubIdx)
+ .addReg(DstReg, RegState::Define |
+ getUndefRegState(SomeMI->getOperand(0).isUndef()),
+ NewDstSubIdx)
.addReg(SrcReg, 0, NewSrcSubIdx);
// Remove all the old extract instructions.
@@ -1452,26 +1833,30 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
unsigned SubIdx = MI->getOperand(i+1).getImm();
- if (MI->getOperand(i).getSubReg() ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
- llvm_unreachable(0);
+ // DefMI of NULL means the value does not have a vreg in this block
+ // i.e., its a physical register or a subreg.
+ // In either case we force a copy to be generated.
+ MachineInstr *DefMI = NULL;
+ if (!MI->getOperand(i).getSubReg() &&
+ !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
}
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI->isImplicitDef()) {
+ if (DefMI && DefMI->isImplicitDef()) {
DefMI->eraseFromParent();
continue;
}
IsImpDef = false;
// Remember COPY sources. These might be candidate for coalescing.
- if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+ if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
RealSrcs.push_back(DefMI->getOperand(1).getReg());
bool isKill = MI->getOperand(i).isKill();
- if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+ if (!DefMI || !Seen.insert(SrcReg) ||
+ MI->getParent() != DefMI->getParent() ||
!isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
!TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
MRI->getRegClass(SrcReg), SubIdx)) {
@@ -1504,9 +1889,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
.addReg(DstReg, RegState::Define, SubIdx)
- .addReg(SrcReg, getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
MI->getOperand(i).setReg(0);
- if (LV && isKill)
+ if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
LV->replaceKillInstruction(SrcReg, MI, CopyMI);
DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
@@ -1519,11 +1904,27 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
}
+ // Set <def,undef> flags on the first DstReg def in the basic block.
+ // It marks the beginning of the live range. All the other defs are
+ // read-modify-write.
+ if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
+ for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = Def->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+ MO.setIsUndef();
+ }
+ // Make sure there is a full non-subreg imp-def operand on the
+ // instruction. This shouldn't be necessary, but it seems that at least
+ // RAFast requires it.
+ Def->addRegisterDefined(DstReg, TRI);
+ DEBUG(dbgs() << "First def: " << *Def);
+ }
+
if (IsImpDef) {
DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
- MI->RemoveOperand(j);
+ MI->RemoveOperand(j);
} else {
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 8a1cdc01c494..3bab93bdc098 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -16,10 +16,9 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "virtregmap"
+#define DEBUG_TYPE "regalloc"
#include "VirtRegMap.h"
#include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,12 +31,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include <algorithm>
using namespace llvm;
@@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
TRI = mf.getTarget().getRegisterInfo();
MF = &mf;
- ReMatId = MAX_STACK_SLOT+1;
- LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
-
Virt2PhysMap.clear();
Virt2StackSlotMap.clear();
- Virt2ReMatIdMap.clear();
Virt2SplitMap.clear();
- Virt2SplitKillMap.clear();
- ReMatMap.clear();
- ImplicitDefed.clear();
- SpillSlotToUsesMap.clear();
- MI2VirtMap.clear();
- SpillPt2VirtMap.clear();
- RestorePt2VirtMap.clear();
- EmergencySpillMap.clear();
- EmergencySpillSlots.clear();
-
- SpillSlotToUsesMap.resize(8);
- ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
-
- allocatableRCRegs.clear();
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- allocatableRCRegs.insert(std::make_pair(*I,
- TRI->getAllocatableSet(mf, *I)));
grow();
-
return false;
}
@@ -93,24 +65,12 @@ void VirtRegMap::grow() {
unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
Virt2PhysMap.resize(NumRegs);
Virt2StackSlotMap.resize(NumRegs);
- Virt2ReMatIdMap.resize(NumRegs);
Virt2SplitMap.resize(NumRegs);
- Virt2SplitKillMap.resize(NumRegs);
- ReMatMap.resize(NumRegs);
- ImplicitDefed.resize(NumRegs);
}
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- assert(SS >= LowSpillSlot && "Unexpected low spill slot");
- unsigned Idx = SS-LowSpillSlot;
- while (Idx >= SpillSlotToUsesMap.size())
- SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
++NumSpillSlots;
return SS;
}
@@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
Virt2StackSlotMap[virtReg] = SS;
}
-int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
- "attempt to assign re-mat id to already spilled register");
- Virt2ReMatIdMap[virtReg] = ReMatId;
- return ReMatId++;
-}
-
-void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
- "attempt to assign re-mat id to already spilled register");
- Virt2ReMatIdMap[virtReg] = id;
-}
-
-int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
- std::map<const TargetRegisterClass*, int>::iterator I =
- EmergencySpillSlots.find(RC);
- if (I != EmergencySpillSlots.end())
- return I->second;
- return EmergencySpillSlots[RC] = createSpillSlot(RC);
-}
-
-void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
- if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
- // If FI < LowSpillSlot, this stack reference was produced by
- // instruction selection and is not a spill
- if (FI >= LowSpillSlot) {
- assert(FI >= 0 && "Spill slot index should not be negative!");
- assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
- && "Invalid spill slot");
- SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
- }
- }
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
- MachineInstr *NewMI, ModRef MRInfo) {
- // Move previous memory references folded to new instruction.
- MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
- for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
- E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
- MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
- MI2VirtMap.erase(I++);
- }
-
- // add new memory reference
- MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
- MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
- MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isFI())
- continue;
- int FI = MO.getIndex();
- if (MF->getFrameInfo()->isFixedObjectIndex(FI))
- continue;
- // This stack reference was produced by instruction selection and
- // is not a spill
- if (FI < LowSpillSlot)
- continue;
- assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
- && "Invalid spill slot");
- SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
- }
- MI2VirtMap.erase(MI);
- SpillPt2VirtMap.erase(MI);
- RestorePt2VirtMap.erase(MI);
- EmergencySpillMap.erase(MI);
-}
-
-/// FindUnusedRegisters - Gather a list of allocatable registers that
-/// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
- unsigned NumRegs = TRI->getNumRegs();
- UnusedRegs.reset();
- UnusedRegs.resize(NumRegs);
-
- BitVector Used(NumRegs);
- for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
- Used.set(Virt2PhysMap[Reg]);
- }
-
- BitVector Allocatable = TRI->getAllocatableSet(*MF);
- bool AnyUnused = false;
- for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
- if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
- bool ReallyUnused = true;
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- if (Used[*AS] || LIs->hasInterval(*AS)) {
- ReallyUnused = false;
- break;
- }
- }
- if (ReallyUnused) {
- AnyUnused = true;
- UnusedRegs.set(Reg);
- }
- }
- }
-
- return AnyUnused;
-}
-
void VirtRegMap::rewrite(SlotIndexes *Indexes) {
DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: "
@@ -264,23 +112,32 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
+#ifndef NDEBUG
+ BitVector Reserved = TRI->getReservedRegs(*MF);
+#endif
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
- for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
- MII != MIE;) {
+ for (MachineBasicBlock::instr_iterator
+ MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
MachineInstr *MI = MII;
++MII;
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
+
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask())
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
unsigned PhysReg = getPhys(VirtReg);
assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+ assert(!Reserved.test(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
if (MO.getSubReg()) {
@@ -332,7 +189,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
++NumIdCopies;
if (MI->getNumOperands() == 2) {
DEBUG(dbgs() << "Deleting identity copy.\n");
- RemoveMachineInstrFromMaps(MI);
if (Indexes)
Indexes->removeMachineInstrFromMaps(MI);
// It's safe to erase MI because MII has already been incremented.
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 03abff356934..8cac31137e3d 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -18,22 +18,14 @@
#define LLVM_CODEGEN_VIRTREGMAP_H
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <map>
namespace llvm {
- class LiveIntervals;
class MachineInstr;
class MachineFunction;
class MachineRegisterInfo;
class TargetInstrInfo;
- class TargetRegisterInfo;
class raw_ostream;
class SlotIndexes;
@@ -45,18 +37,12 @@ namespace llvm {
MAX_STACK_SLOT = (1L << 18)-1
};
- enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
- typedef std::multimap<MachineInstr*,
- std::pair<unsigned, ModRef> > MI2VirtMapTy;
-
private:
MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineFunction *MF;
- DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
-
/// Virt2PhysMap - This is a virtual to physical register
/// mapping. Each virtual register is required to have an entry in
/// it; even spilled virtual registers (the register mapped to a
@@ -70,71 +56,10 @@ namespace llvm {
/// at.
IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
- /// Virt2ReMatIdMap - This is virtual register to rematerialization id
- /// mapping. Each spilled virtual register that should be remat'd has an
- /// entry in it which corresponds to the remat id.
- IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
-
/// Virt2SplitMap - This is virtual register to splitted virtual register
/// mapping.
IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
- /// Virt2SplitKillMap - This is splitted virtual register to its last use
- /// (kill) index mapping.
- IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
-
- /// ReMatMap - This is virtual register to re-materialized instruction
- /// mapping. Each virtual register whose definition is going to be
- /// re-materialized has an entry in it.
- IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
-
- /// MI2VirtMap - This is MachineInstr to virtual register
- /// mapping. In the case of memory spill code being folded into
- /// instructions, we need to know which virtual register was
- /// read/written by this instruction.
- MI2VirtMapTy MI2VirtMap;
-
- /// SpillPt2VirtMap - This records the virtual registers which should
- /// be spilled right after the MachineInstr due to live interval
- /// splitting.
- std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
- SpillPt2VirtMap;
-
- /// RestorePt2VirtMap - This records the virtual registers which should
- /// be restored right before the MachineInstr due to live interval
- /// splitting.
- std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
-
- /// EmergencySpillMap - This records the physical registers that should
- /// be spilled / restored around the MachineInstr since the register
- /// allocator has run out of registers.
- std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
-
- /// EmergencySpillSlots - This records emergency spill slots used to
- /// spill physical registers when the register allocator runs out of
- /// registers. Ideally only one stack slot is used per function per
- /// register class.
- std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
-
- /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
- /// virtual register, an unique id is being assigned. This keeps track of
- /// the highest id used so far. Note, this starts at (1<<18) to avoid
- /// conflicts with stack slot numbers.
- int ReMatId;
-
- /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
- int LowSpillSlot, HighSpillSlot;
-
- /// SpillSlotToUsesMap - Records uses for each register spill slot.
- SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
-
- /// ImplicitDefed - One bit for each virtual register. If set it indicates
- /// the register is implicitly defined.
- BitVector ImplicitDefed;
-
- /// UnusedRegs - A list of physical registers that have not been used.
- BitVector UnusedRegs;
-
/// createSpillSlot - Allocate a spill slot for RC from MFI.
unsigned createSpillSlot(const TargetRegisterClass *RC);
@@ -144,11 +69,7 @@ namespace llvm {
public:
static char ID;
VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
- Virt2StackSlotMap(NO_STACK_SLOT),
- Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
- Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
- ReMatId(MAX_STACK_SLOT+1),
- LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+ Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -235,8 +156,7 @@ namespace llvm {
/// @brief returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
bool isAssignedReg(unsigned virtReg) const {
- if (getStackSlot(virtReg) == NO_STACK_SLOT &&
- getReMatId(virtReg) == NO_STACK_SLOT)
+ if (getStackSlot(virtReg) == NO_STACK_SLOT)
return true;
// Split register can be assigned a physical register as well as a
// stack slot or remat id.
@@ -250,13 +170,6 @@ namespace llvm {
return Virt2StackSlotMap[virtReg];
}
- /// @brief returns the rematerialization id mapped to the specified virtual
- /// register
- int getReMatId(unsigned virtReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- return Virt2ReMatIdMap[virtReg];
- }
-
/// @brief create a mapping for the specifed virtual register to
/// the next available stack slot
int assignVirt2StackSlot(unsigned virtReg);
@@ -264,250 +177,6 @@ namespace llvm {
/// the specified stack slot
void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
- /// @brief assign an unique re-materialization id to the specified
- /// virtual register.
- int assignVirtReMatId(unsigned virtReg);
- /// @brief assign an unique re-materialization id to the specified
- /// virtual register.
- void assignVirtReMatId(unsigned virtReg, int id);
-
- /// @brief returns true if the specified virtual register is being
- /// re-materialized.
- bool isReMaterialized(unsigned virtReg) const {
- return ReMatMap[virtReg] != NULL;
- }
-
- /// @brief returns the original machine instruction being re-issued
- /// to re-materialize the specified virtual register.
- MachineInstr *getReMaterializedMI(unsigned virtReg) const {
- return ReMatMap[virtReg];
- }
-
- /// @brief records the specified virtual register will be
- /// re-materialized and the original instruction which will be re-issed
- /// for this purpose. If parameter all is true, then all uses of the
- /// registers are rematerialized and it's safe to delete the definition.
- void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
- ReMatMap[virtReg] = def;
- }
-
- /// @brief record the last use (kill) of a split virtual register.
- void addKillPoint(unsigned virtReg, SlotIndex index) {
- Virt2SplitKillMap[virtReg] = index;
- }
-
- SlotIndex getKillPoint(unsigned virtReg) const {
- return Virt2SplitKillMap[virtReg];
- }
-
- /// @brief remove the last use (kill) of a split virtual register.
- void removeKillPoint(unsigned virtReg) {
- Virt2SplitKillMap[virtReg] = SlotIndex();
- }
-
- /// @brief returns true if the specified MachineInstr is a spill point.
- bool isSpillPt(MachineInstr *Pt) const {
- return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
- }
-
- /// @brief returns the virtual registers that should be spilled due to
- /// splitting right after the specified MachineInstr.
- std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
- return SpillPt2VirtMap[Pt];
- }
-
- /// @brief records the specified MachineInstr as a spill point for virtReg.
- void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
- std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
- I = SpillPt2VirtMap.find(Pt);
- if (I != SpillPt2VirtMap.end())
- I->second.push_back(std::make_pair(virtReg, isKill));
- else {
- std::vector<std::pair<unsigned,bool> > Virts;
- Virts.push_back(std::make_pair(virtReg, isKill));
- SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
- }
- }
-
- /// @brief - transfer spill point information from one instruction to
- /// another.
- void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
- std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
- I = SpillPt2VirtMap.find(Old);
- if (I == SpillPt2VirtMap.end())
- return;
- while (!I->second.empty()) {
- unsigned virtReg = I->second.back().first;
- bool isKill = I->second.back().second;
- I->second.pop_back();
- addSpillPoint(virtReg, isKill, New);
- }
- SpillPt2VirtMap.erase(I);
- }
-
- /// @brief returns true if the specified MachineInstr is a restore point.
- bool isRestorePt(MachineInstr *Pt) const {
- return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
- }
-
- /// @brief returns the virtual registers that should be restoreed due to
- /// splitting right after the specified MachineInstr.
- std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
- return RestorePt2VirtMap[Pt];
- }
-
- /// @brief records the specified MachineInstr as a restore point for virtReg.
- void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
- std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
- RestorePt2VirtMap.find(Pt);
- if (I != RestorePt2VirtMap.end())
- I->second.push_back(virtReg);
- else {
- std::vector<unsigned> Virts;
- Virts.push_back(virtReg);
- RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
- }
- }
-
- /// @brief - transfer restore point information from one instruction to
- /// another.
- void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
- std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
- RestorePt2VirtMap.find(Old);
- if (I == RestorePt2VirtMap.end())
- return;
- while (!I->second.empty()) {
- unsigned virtReg = I->second.back();
- I->second.pop_back();
- addRestorePoint(virtReg, New);
- }
- RestorePt2VirtMap.erase(I);
- }
-
- /// @brief records that the specified physical register must be spilled
- /// around the specified machine instr.
- void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
- if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
- EmergencySpillMap[MI].push_back(PhysReg);
- else {
- std::vector<unsigned> PhysRegs;
- PhysRegs.push_back(PhysReg);
- EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
- }
- }
-
- /// @brief returns true if one or more physical registers must be spilled
- /// around the specified instruction.
- bool hasEmergencySpills(MachineInstr *MI) const {
- return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
- }
-
- /// @brief returns the physical registers to be spilled and restored around
- /// the instruction.
- std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
- return EmergencySpillMap[MI];
- }
-
- /// @brief - transfer emergency spill information from one instruction to
- /// another.
- void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
- std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
- EmergencySpillMap.find(Old);
- if (I == EmergencySpillMap.end())
- return;
- while (!I->second.empty()) {
- unsigned virtReg = I->second.back();
- I->second.pop_back();
- addEmergencySpill(virtReg, New);
- }
- EmergencySpillMap.erase(I);
- }
-
- /// @brief return or get a emergency spill slot for the register class.
- int getEmergencySpillSlot(const TargetRegisterClass *RC);
-
- /// @brief Return lowest spill slot index.
- int getLowSpillSlot() const {
- return LowSpillSlot;
- }
-
- /// @brief Return highest spill slot index.
- int getHighSpillSlot() const {
- return HighSpillSlot;
- }
-
- /// @brief Records a spill slot use.
- void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
-
- /// @brief Returns true if spill slot has been used.
- bool isSpillSlotUsed(int FrameIndex) const {
- assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
- return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
- }
-
- /// @brief Mark the specified register as being implicitly defined.
- void setIsImplicitlyDefined(unsigned VirtReg) {
- ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
- }
-
- /// @brief Returns true if the virtual register is implicitly defined.
- bool isImplicitlyDefined(unsigned VirtReg) const {
- return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
- }
-
- /// @brief Updates information about the specified virtual register's value
- /// folded into newMI machine instruction.
- void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
- ModRef MRInfo);
-
- /// @brief Updates information about the specified virtual register's value
- /// folded into the specified machine instruction.
- void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
-
- /// @brief returns the virtual registers' values folded in memory
- /// operands of this instruction
- std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
- getFoldedVirts(MachineInstr* MI) const {
- return MI2VirtMap.equal_range(MI);
- }
-
- /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
- /// the folded instruction map and spill point map.
- void RemoveMachineInstrFromMaps(MachineInstr *MI);
-
- /// FindUnusedRegisters - Gather a list of allocatable registers that
- /// have not been allocated to any virtual register.
- bool FindUnusedRegisters(LiveIntervals* LIs);
-
- /// HasUnusedRegisters - Return true if there are any allocatable registers
- /// that have not been allocated to any virtual register.
- bool HasUnusedRegisters() const {
- return !UnusedRegs.none();
- }
-
- /// setRegisterUsed - Remember the physical register is now used.
- void setRegisterUsed(unsigned Reg) {
- UnusedRegs.reset(Reg);
- }
-
- /// isRegisterUnused - Return true if the physical register has not been
- /// used.
- bool isRegisterUnused(unsigned Reg) const {
- return UnusedRegs[Reg];
- }
-
- /// getFirstUnusedRegister - Return the first physical register that has not
- /// been used.
- unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
- int Reg = UnusedRegs.find_first();
- while (Reg != -1) {
- if (allocatableRCRegs[RC][Reg])
- return (unsigned)Reg;
- Reg = UnusedRegs.find_next(Reg);
- }
- return 0;
- }
-
/// rewrite - Rewrite all instructions in MF to use only physical registers
/// by mapping all virtual register operands to their assigned physical
/// registers.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
deleted file mode 100644
index a5ec797b27db..000000000000
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ /dev/null
@@ -1,2633 +0,0 @@
-//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "virtregrewriter"
-#include "VirtRegRewriter.h"
-#include "VirtRegMap.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumDSE , "Number of dead stores elided");
-STATISTIC(NumDSS , "Number of dead spill slots removed");
-STATISTIC(NumCommutes, "Number of instructions commuted");
-STATISTIC(NumDRM , "Number of re-materializable defs elided");
-STATISTIC(NumStores , "Number of stores added");
-STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omitted");
-STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
-STATISTIC(NumCopified, "Number of available reloads turned into copies");
-STATISTIC(NumReMats , "Number of re-materialization");
-STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumReused , "Number of values reused");
-STATISTIC(NumDCE , "Number of copies elided");
-STATISTIC(NumSUnfold , "Number of stores unfolded");
-STATISTIC(NumModRefUnfold, "Number of modref unfolded");
-
-namespace {
- enum RewriterName { local, trivial };
-}
-
-static cl::opt<RewriterName>
-RewriterOpt("rewriter",
- cl::desc("Rewriter to use (default=local)"),
- cl::Prefix,
- cl::values(clEnumVal(local, "local rewriter"),
- clEnumVal(trivial, "trivial rewriter"),
- clEnumValEnd),
- cl::init(local));
-
-static cl::opt<bool>
-ScheduleSpills("schedule-spills",
- cl::desc("Schedule spill code"),
- cl::init(false));
-
-VirtRegRewriter::~VirtRegRewriter() {}
-
-/// substitutePhysReg - Replace virtual register in MachineOperand with a
-/// physical register. Do the right thing with the sub-register index.
-/// Note that operands may be added, so the MO reference is no longer valid.
-static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
- const TargetRegisterInfo &TRI) {
- if (MO.getSubReg()) {
- MO.substPhysReg(Reg, TRI);
-
- // Any kill flags apply to the full virtual register, so they also apply to
- // the full physical register.
- // We assume that partial defs have already been decorated with a super-reg
- // <imp-def> operand by LiveIntervals.
- MachineInstr &MI = *MO.getParent();
- if (MO.isUse() && !MO.isUndef() &&
- (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
- MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
- } else {
- MO.setReg(Reg);
- }
-}
-
-namespace {
-
-/// This class is intended for use with the new spilling framework only. It
-/// rewrites vreg def/uses to use the assigned preg, but does not insert any
-/// spill code.
-struct TrivialRewriter : public VirtRegRewriter {
-
- bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
- LiveIntervals* LIs) {
- DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
- DEBUG(dbgs() << "********** Function: "
- << MF.getFunction()->getName() << '\n');
- DEBUG(dbgs() << "**** Machine Instrs"
- << "(NOTE! Does not include spills and reloads!) ****\n");
- DEBUG(MF.dump());
-
- MachineRegisterInfo *mri = &MF.getRegInfo();
- const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
-
- bool changed = false;
-
- for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
- liItr != liEnd; ++liItr) {
-
- const LiveInterval *li = liItr->second;
- unsigned reg = li->reg;
-
- if (TargetRegisterInfo::isPhysicalRegister(reg)) {
- if (!li->empty())
- mri->setPhysRegUsed(reg);
- }
- else {
- if (!VRM.hasPhys(reg))
- continue;
- unsigned pReg = VRM.getPhys(reg);
- mri->setPhysRegUsed(pReg);
- // Copy the register use-list before traversing it.
- SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
- for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
- E = mri->reg_end(); I != E; ++I)
- reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
- for (unsigned N=0; N != reglist.size(); ++N)
- substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
- pReg, *tri);
- changed |= !reglist.empty();
- }
- }
-
- DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.dump());
-
- return changed;
- }
-
-};
-
-}
-
-// ************************************************************************ //
-
-namespace {
-
-/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
-/// from top down, keep track of which spill slots or remat are available in
-/// each register.
-///
-/// Note that not all physregs are created equal here. In particular, some
-/// physregs are reloads that we are allowed to clobber or ignore at any time.
-/// Other physregs are values that the register allocated program is using
-/// that we cannot CHANGE, but we can read if we like. We keep track of this
-/// on a per-stack-slot / remat id basis as the low bit in the value of the
-/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks
-/// this bit and addAvailable sets it if.
-class AvailableSpills {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
-
- // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
- // or remat'ed virtual register values that are still available, due to
- // being loaded or stored to, but not invalidated yet.
- std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
-
- // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
- // indicating which stack slot values are currently held by a physreg. This
- // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
- // physreg is modified.
- std::multimap<unsigned, int> PhysRegsAvailable;
-
- void disallowClobberPhysRegOnly(unsigned PhysReg);
-
- void ClobberPhysRegOnly(unsigned PhysReg);
-public:
- AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
- : TRI(tri), TII(tii) {
- }
-
- /// clear - Reset the state.
- void clear() {
- SpillSlotsOrReMatsAvailable.clear();
- PhysRegsAvailable.clear();
- }
-
- const TargetRegisterInfo *getRegInfo() const { return TRI; }
-
- /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
- /// available in a physical register, return that PhysReg, otherwise
- /// return 0.
- unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
- std::map<int, unsigned>::const_iterator I =
- SpillSlotsOrReMatsAvailable.find(Slot);
- if (I != SpillSlotsOrReMatsAvailable.end()) {
- return I->second >> 1; // Remove the CanClobber bit.
- }
- return 0;
- }
-
- /// addAvailable - Mark that the specified stack slot / remat is available
- /// in the specified physreg. If CanClobber is true, the physreg can be
- /// modified at any time without changing the semantics of the program.
- void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
- // If this stack slot is thought to be available in some other physreg,
- // remove its record.
- ModifyStackSlotOrReMat(SlotOrReMat);
-
- PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
- SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
- (unsigned)CanClobber;
-
- if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Remembering RM#"
- << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
- DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
- << (CanClobber ? " canclobber" : "") << "\n");
- }
-
- /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
- /// the value of the specified stackslot register if it desires. The
- /// specified stack slot must be available in a physreg for this query to
- /// make sense.
- bool canClobberPhysRegForSS(int SlotOrReMat) const {
- assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
- "Value not available!");
- return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
- }
-
- /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
- /// physical register where values for some stack slot(s) might be
- /// available.
- bool canClobberPhysReg(unsigned PhysReg) const {
- std::multimap<unsigned, int>::const_iterator I =
- PhysRegsAvailable.lower_bound(PhysReg);
- while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
- int SlotOrReMat = I->second;
- I++;
- if (!canClobberPhysRegForSS(SlotOrReMat))
- return false;
- }
- return true;
- }
-
- /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
- /// stackslot register. The register is still available but is no longer
- /// allowed to be modifed.
- void disallowClobberPhysReg(unsigned PhysReg);
-
- /// ClobberPhysReg - This is called when the specified physreg changes
- /// value. We use this to invalidate any info about stuff that lives in
- /// it and any of its aliases.
- void ClobberPhysReg(unsigned PhysReg);
-
- /// ModifyStackSlotOrReMat - This method is called when the value in a stack
- /// slot changes. This removes information about which register the
- /// previous value for this slot lives in (as the previous value is dead
- /// now).
- void ModifyStackSlotOrReMat(int SlotOrReMat);
-
- /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
- /// other stack slots sharing the same register are no longer valid.
- void ClobberSharingStackSlots(int StackSlot);
-
- /// AddAvailableRegsToLiveIn - Availability information is being kept coming
- /// into the specified MBB. Add available physical registers as potential
- /// live-in's. If they are reused in the MBB, they will be added to the
- /// live-in set to make register scavenger and post-allocation scheduler.
- void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-};
-
-}
-
-// ************************************************************************ //
-
-// Given a location where a reload of a spilled register or a remat of
-// a constant is to be inserted, attempt to find a safe location to
-// insert the load at an earlier point in the basic-block, to hide
-// latency of the load and to avoid address-generation interlock
-// issues.
-static MachineBasicBlock::iterator
-ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
- MachineBasicBlock::iterator const Begin,
- unsigned PhysReg,
- const TargetRegisterInfo *TRI,
- bool DoReMat,
- int SSorRMId,
- const TargetInstrInfo *TII,
- const MachineFunction &MF)
-{
- if (!ScheduleSpills)
- return InsertLoc;
-
- // Spill backscheduling is of primary interest to addresses, so
- // don't do anything if the register isn't in the register class
- // used for pointers.
-
- const TargetLowering *TL = MF.getTarget().getTargetLowering();
-
- if (!TL->isTypeLegal(TL->getPointerTy()))
- // Believe it or not, this is true on 16-bit targets like PIC16.
- return InsertLoc;
-
- const TargetRegisterClass *ptrRegClass =
- TL->getRegClassFor(TL->getPointerTy());
- if (!ptrRegClass->contains(PhysReg))
- return InsertLoc;
-
- // Scan upwards through the preceding instructions. If an instruction doesn't
- // reference the stack slot or the register we're loading, we can
- // backschedule the reload up past it.
- MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
- while (NewInsertLoc != Begin) {
- MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
- for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
- MachineOperand &Op = Prev->getOperand(i);
- if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
- goto stop;
- }
- if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
- Prev->findRegisterDefOperand(PhysReg))
- goto stop;
- for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
- if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
- Prev->findRegisterDefOperand(*Alias))
- goto stop;
- NewInsertLoc = Prev;
- }
-stop:;
-
- // If we made it to the beginning of the block, turn around and move back
- // down just past any existing reloads. They're likely to be reloads/remats
- // for instructions earlier than what our current reload/remat is for, so
- // they should be scheduled earlier.
- if (NewInsertLoc == Begin) {
- int FrameIdx;
- while (InsertLoc != NewInsertLoc &&
- (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
- TII->isTriviallyReMaterializable(NewInsertLoc)))
- ++NewInsertLoc;
- }
-
- return NewInsertLoc;
-}
-
-namespace {
-
-// ReusedOp - For each reused operand, we keep track of a bit of information,
-// in case we need to rollback upon processing a new operand. See comments
-// below.
-struct ReusedOp {
- // The MachineInstr operand that reused an available value.
- unsigned Operand;
-
- // StackSlotOrReMat - The spill slot or remat id of the value being reused.
- unsigned StackSlotOrReMat;
-
- // PhysRegReused - The physical register the value was available in.
- unsigned PhysRegReused;
-
- // AssignedPhysReg - The physreg that was assigned for use by the reload.
- unsigned AssignedPhysReg;
-
- // VirtReg - The virtual register itself.
- unsigned VirtReg;
-
- ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
- unsigned vreg)
- : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
- AssignedPhysReg(apr), VirtReg(vreg) {}
-};
-
-/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
-/// is reused instead of reloaded.
-class ReuseInfo {
- MachineInstr &MI;
- std::vector<ReusedOp> Reuses;
- BitVector PhysRegsClobbered;
-public:
- ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
- PhysRegsClobbered.resize(tri->getNumRegs());
- }
-
- bool hasReuses() const {
- return !Reuses.empty();
- }
-
- /// addReuse - If we choose to reuse a virtual register that is already
- /// available instead of reloading it, remember that we did so.
- void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
- unsigned PhysRegReused, unsigned AssignedPhysReg,
- unsigned VirtReg) {
- // If the reload is to the assigned register anyway, no undo will be
- // required.
- if (PhysRegReused == AssignedPhysReg) return;
-
- // Otherwise, remember this.
- Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
- AssignedPhysReg, VirtReg));
- }
-
- void markClobbered(unsigned PhysReg) {
- PhysRegsClobbered.set(PhysReg);
- }
-
- bool isClobbered(unsigned PhysReg) const {
- return PhysRegsClobbered.test(PhysReg);
- }
-
- /// GetRegForReload - We are about to emit a reload into PhysReg. If there
- /// is some other operand that is using the specified register, either pick
- /// a new register to use, or evict the previous reload and use this reg.
- unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
- MachineFunction &MF, MachineInstr *MI,
- AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- SmallSet<unsigned, 8> &Rejected,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM);
-
- /// GetRegForReload - Helper for the above GetRegForReload(). Add a
- /// 'Rejected' set to remember which registers have been considered and
- /// rejected for the reload. This avoids infinite looping in case like
- /// this:
- /// t1 := op t2, t3
- /// t2 <- assigned r0 for use by the reload but ended up reuse r1
- /// t3 <- assigned r1 for use by the reload but ended up reuse r0
- /// t1 <- desires r1
- /// sees r1 is taken by t2, tries t2's reload register r0
- /// sees r0 is taken by t3, tries t3's reload register r1
- /// sees r1 is taken by t2, tries t2's reload register r0 ...
- unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
- AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM) {
- SmallSet<unsigned, 8> Rejected;
- MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
- return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
- Rejected, RegKills, KillOps, VRM);
- }
-};
-
-}
-
-// ****************** //
-// Utility Functions //
-// ****************** //
-
-/// findSinglePredSuccessor - Return via reference a vector of machine basic
-/// blocks each of which is a successor of the specified BB and has no other
-/// predecessor.
-static void findSinglePredSuccessor(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineBasicBlock *> &Succs){
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB->pred_size() == 1)
- Succs.push_back(SuccMBB);
- }
-}
-
-/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
-/// but not re-defined and it's being reused. Remove the kill flag for the
-/// register and unset the kill's marker and last kill operand.
-static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
-
- MachineOperand *KillOp = KillOps[Reg];
- KillOp->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOp->getReg();
- if (!RegKills[KReg])
- return;
-
- assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
- "invalid superreg kill flags");
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- // If it's a def of a super-register. Its other sub-regsters are no
- // longer killed as well.
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n");
-
- assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
- "invalid subreg kill flags");
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
-}
-
-/// ResurrectKill - Invalidate kill info associated with a previous MI. An
-/// optimization may have decided that it's safe to reuse a previously killed
-/// register. If we fail to erase the invalid kill flags, then the register
-/// scavenger may later clobber the register used by this MI. Note that this
-/// must be done even if this MI is being deleted! Consider:
-///
-/// USE $r1 (vreg1) <kill>
-/// ...
-/// $r1(vreg3) = COPY $r1 (vreg2)
-///
-/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
-/// vreg1's only use is a kill. The rewriter doesn't know it should be live
-/// until it rewrites vreg2. At that points it sees that the copy is dead and
-/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
-/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
-/// vreg1 before deleting the copy.
-static void ResurrectKill(MachineInstr &MI, unsigned Reg,
- const TargetRegisterInfo* TRI, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
- ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
- return;
- }
- // No previous kill for this reg. Check for subreg kills as well.
- // d4 =
- // store d4, fi#0
- // ...
- // = s8<kill>
- // ...
- // = d4 <avoiding reload>
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- unsigned SReg = *SR;
- if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
- ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
- }
-}
-
-/// InvalidateKills - MI is going to be deleted. If any of its operands are
-/// marked kill, then invalidate the information.
-static void InvalidateKills(MachineInstr &MI,
- const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- SmallVector<unsigned, 2> *KillRegs = NULL) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
- continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (KillRegs)
- KillRegs->push_back(Reg);
- assert(Reg < KillOps.size());
- if (KillOps[Reg] == &MO) {
- // This operand was the kill, now no longer.
- KillOps[Reg] = NULL;
- RegKills.reset(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- if (RegKills[*SR]) {
- assert(KillOps[*SR] == &MO && "bad subreg kill flags");
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- }
- }
- else {
- // This operand may have reused a previously killed reg. Keep it live in
- // case it continues to be used after erasing this instruction.
- ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
- }
- }
-}
-
-/// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since its spill instruction is removed), mark it isDead. Also checks if
-/// the def MI has other definition operands that are not dead. Returns it by
-/// reference.
-static bool InvalidateRegDef(MachineBasicBlock::iterator I,
- MachineInstr &NewDef, unsigned Reg,
- bool &HasLiveDef,
- const TargetRegisterInfo *TRI) {
- // Due to remat, it's possible this reg isn't being reused. That is,
- // the def of this reg (by prev MI) is now dead.
- MachineInstr *DefMI = I;
- MachineOperand *DefOp = NULL;
- for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = DefMI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
- continue;
- if (MO.getReg() == Reg)
- DefOp = &MO;
- else if (!MO.isDead())
- HasLiveDef = true;
- }
- if (!DefOp)
- return false;
-
- bool FoundUse = false, Done = false;
- MachineBasicBlock::iterator E = &NewDef;
- ++I; ++E;
- for (; !Done && I != E; ++I) {
- MachineInstr *NMI = I;
- for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = NMI->getOperand(j);
- if (!MO.isReg() || MO.getReg() == 0 ||
- (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
- continue;
- if (MO.isUse())
- FoundUse = true;
- Done = true; // Stop after scanning all the operands of this MI.
- }
- }
- if (!FoundUse) {
- // Def is dead!
- DefOp->setIsDead();
- return true;
- }
- return false;
-}
-
-/// UpdateKills - Track and update kill info. If a MI reads a register that is
-/// marked kill, then it must be due to register reuse. Transfer the kill info
-/// over.
-static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- // These do not affect kill info at all.
- if (MI.isDebugValue())
- return;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.isUndef())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
-
- // This operand may have reused a previously killed reg. Keep it live.
- ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
-
- if (MO.isKill()) {
- RegKills.set(Reg);
- KillOps[Reg] = &MO;
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- RegKills.set(*SR);
- KillOps[*SR] = &MO;
- }
- }
- }
-
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.getReg() || !MO.isDef())
- continue;
- unsigned Reg = MO.getReg();
- RegKills.reset(Reg);
- KillOps[Reg] = NULL;
- // It also defines (or partially define) aliases.
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- RegKills.reset(*SR);
- KillOps[*SR] = NULL;
- }
- for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
- RegKills.reset(*SR);
- KillOps[*SR] = NULL;
- }
- }
-}
-
-/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
-///
-static void ReMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MII,
- unsigned DestReg, unsigned Reg,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- VirtRegMap &VRM) {
- MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
-#ifndef NDEBUG
- const MCInstrDesc &MCID = ReMatDefMI->getDesc();
- assert(MCID.getNumDefs() == 1 &&
- "Don't know how to remat instructions that define > 1 values!");
-#endif
- TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
- MachineInstr *NewMI = prior(MII);
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
- continue;
- assert(MO.isUse());
- unsigned Phys = VRM.getPhys(VirtReg);
- assert(Phys && "Virtual register is not assigned a register?");
- substitutePhysReg(MO, Phys, *TRI);
- }
- ++NumReMats;
-}
-
-/// findSuperReg - Find the SubReg's super-register of given register class
-/// where its SubIdx sub-register is SubReg.
-static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
- unsigned SubIdx, const TargetRegisterInfo *TRI) {
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- unsigned Reg = *I;
- if (TRI->getSubReg(Reg, SubIdx) == SubReg)
- return Reg;
- }
- return 0;
-}
-
-// ******************************** //
-// Available Spills Implementation //
-// ******************************** //
-
-/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
-/// stackslot register. The register is still available but is no longer
-/// allowed to be modifed.
-void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
- std::multimap<unsigned, int>::iterator I =
- PhysRegsAvailable.lower_bound(PhysReg);
- while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
- int SlotOrReMat = I->second;
- I++;
- assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
- "Bidirectional map mismatch!");
- SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
- DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
- << " copied, it is available for use but can no longer be modified\n");
- }
-}
-
-/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
-/// stackslot register and its aliases. The register and its aliases may
-/// still available but is no longer allowed to be modifed.
-void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
- for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
- disallowClobberPhysRegOnly(*AS);
- disallowClobberPhysRegOnly(PhysReg);
-}
-
-/// ClobberPhysRegOnly - This is called when the specified physreg changes
-/// value. We use this to invalidate any info about stuff we thing lives in it.
-void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
- std::multimap<unsigned, int>::iterator I =
- PhysRegsAvailable.lower_bound(PhysReg);
- while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
- int SlotOrReMat = I->second;
- PhysRegsAvailable.erase(I++);
- assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
- "Bidirectional map mismatch!");
- SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
- DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
- << " clobbered, invalidating ");
- if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
- else
- DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
- }
-}
-
-/// ClobberPhysReg - This is called when the specified physreg changes
-/// value. We use this to invalidate any info about stuff we thing lives in
-/// it and any of its aliases.
-void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
- for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
- ClobberPhysRegOnly(*AS);
- ClobberPhysRegOnly(PhysReg);
-}
-
-/// AddAvailableRegsToLiveIn - Availability information is being kept coming
-/// into the specified MBB. Add available physical registers as potential
-/// live-in's. If they are reused in the MBB, they will be added to the
-/// live-in set to make register scavenger and post-allocation scheduler.
-void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- std::set<unsigned> NotAvailable;
- for (std::multimap<unsigned, int>::iterator
- I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
- I != E; ++I) {
- unsigned Reg = I->first;
- const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
- // FIXME: A temporary workaround. We can't reuse available value if it's
- // not safe to move the def of the virtual register's class. e.g.
- // X86::RFP* register classes. Do not add it as a live-in.
- if (!TII->isSafeToMoveRegClassDefs(RC))
- // This is no longer available.
- NotAvailable.insert(Reg);
- else {
- MBB.addLiveIn(Reg);
- if (RegKills[Reg])
- ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
- }
-
- // Skip over the same register.
- std::multimap<unsigned, int>::iterator NI = llvm::next(I);
- while (NI != E && NI->first == Reg) {
- ++I;
- ++NI;
- }
- }
-
- for (std::set<unsigned>::iterator I = NotAvailable.begin(),
- E = NotAvailable.end(); I != E; ++I) {
- ClobberPhysReg(*I);
- for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
- *SubRegs; ++SubRegs)
- ClobberPhysReg(*SubRegs);
- }
-}
-
-/// ModifyStackSlotOrReMat - This method is called when the value in a stack
-/// slot changes. This removes information about which register the previous
-/// value for this slot lives in (as the previous value is dead now).
-void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
- std::map<int, unsigned>::iterator It =
- SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
- if (It == SpillSlotsOrReMatsAvailable.end()) return;
- unsigned Reg = It->second >> 1;
- SpillSlotsOrReMatsAvailable.erase(It);
-
- // This register may hold the value of multiple stack slots, only remove this
- // stack slot from the set of values the register contains.
- std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
- for (; ; ++I) {
- assert(I != PhysRegsAvailable.end() && I->first == Reg &&
- "Map inverse broken!");
- if (I->second == SlotOrReMat) break;
- }
- PhysRegsAvailable.erase(I);
-}
-
-void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
- std::map<int, unsigned>::iterator It =
- SpillSlotsOrReMatsAvailable.find(StackSlot);
- if (It == SpillSlotsOrReMatsAvailable.end()) return;
- unsigned Reg = It->second >> 1;
-
- // Erase entries in PhysRegsAvailable for other stack slots.
- std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
- while (I != PhysRegsAvailable.end() && I->first == Reg) {
- std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
- if (I->second != StackSlot) {
- DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
- << PrintReg(Reg, TRI) << '\n');
- SpillSlotsOrReMatsAvailable.erase(I->second);
- PhysRegsAvailable.erase(I);
- }
- I = NextI;
- }
-}
-
-// ************************** //
-// Reuse Info Implementation //
-// ************************** //
-
-/// GetRegForReload - We are about to emit a reload into PhysReg. If there
-/// is some other operand that is using the specified register, either pick
-/// a new register to use, or evict the previous reload and use this reg.
-unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
- unsigned PhysReg,
- MachineFunction &MF,
- MachineInstr *MI, AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- SmallSet<unsigned, 8> &Rejected,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM) {
- const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
- const TargetRegisterInfo *TRI = Spills.getRegInfo();
-
- if (Reuses.empty()) return PhysReg; // This is most often empty.
-
- for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
- ReusedOp &Op = Reuses[ro];
- // If we find some other reuse that was supposed to use this register
- // exactly for its reload, we can change this reload to use ITS reload
- // register. That is, unless its reload register has already been
- // considered and subsequently rejected because it has also been reused
- // by another operand.
- if (Op.PhysRegReused == PhysReg &&
- Rejected.count(Op.AssignedPhysReg) == 0 &&
- RC->contains(Op.AssignedPhysReg)) {
- // Yup, use the reload register that we didn't use before.
- unsigned NewReg = Op.AssignedPhysReg;
- Rejected.insert(PhysReg);
- return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
- Rejected, RegKills, KillOps, VRM);
- } else {
- // Otherwise, we might also have a problem if a previously reused
- // value aliases the new register. If so, codegen the previous reload
- // and use this one.
- unsigned PRRU = Op.PhysRegReused;
- if (TRI->regsOverlap(PRRU, PhysReg)) {
- // Okay, we found out that an alias of a reused register
- // was used. This isn't good because it means we have
- // to undo a previous reuse.
- MachineBasicBlock *MBB = MI->getParent();
- const TargetRegisterClass *AliasRC =
- MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
-
- // Copy Op out of the vector and remove it, we're going to insert an
- // explicit load for it.
- ReusedOp NewOp = Op;
- Reuses.erase(Reuses.begin()+ro);
-
- // MI may be using only a sub-register of PhysRegUsed.
- unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
- unsigned SubIdx = 0;
- assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
- "A reuse cannot be a virtual register");
- if (PRRU != RealPhysRegUsed) {
- // What was the sub-register index?
- SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
- assert(SubIdx &&
- "Operand physreg is not a sub-register of PhysRegUsed");
- }
-
- // Ok, we're going to try to reload the assigned physreg into the
- // slot that we were supposed to in the first place. However, that
- // register could hold a reuse. Check to see if it conflicts or
- // would prefer us to use a different register.
- unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
- MF, MI, Spills, MaybeDeadStores,
- Rejected, RegKills, KillOps, VRM);
-
- bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
- int SSorRMId = DoReMat
- ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
- DoReMat, SSorRMId, TII, MF);
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
- TRI, VRM);
- } else {
- TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
- NewOp.StackSlotOrReMat, AliasRC, TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
- // Any stores to this stack slot are not dead anymore.
- MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
- ++NumLoads;
- }
- Spills.ClobberPhysReg(NewPhysReg);
- Spills.ClobberPhysReg(NewOp.PhysRegReused);
-
- unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
- MI->getOperand(NewOp.Operand).setReg(RReg);
- MI->getOperand(NewOp.Operand).setSubReg(0);
-
- Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-
- DEBUG(dbgs() << "Reuse undone!\n");
- --NumReused;
-
- // Finally, PhysReg is now available, go ahead and use it.
- return PhysReg;
- }
- }
- }
- return PhysReg;
-}
-
-// ************************************************************************ //
-
-/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
-/// stack slot mod/ref. It also checks if it's possible to unfold the
-/// instruction by having it define a specified physical register instead.
-static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- VirtRegMap &VRM) {
- if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
- return false;
-
- bool Found = false;
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
- for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
- unsigned VirtReg = I->second.first;
- VirtRegMap::ModRef MR = I->second.second;
- if (MR & VirtRegMap::isModRef)
- if (VRM.getStackSlot(VirtReg) == SS) {
- Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
- break;
- }
- }
- if (!Found)
- return false;
-
- // Does the instruction uses a register that overlaps the scratch register?
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- if (!VRM.hasPhys(Reg))
- continue;
- Reg = VRM.getPhys(Reg);
- }
- if (TRI->regsOverlap(PhysReg, Reg))
- return false;
- }
- return true;
-}
-
-/// FindFreeRegister - Find a free register of a given register class by looking
-/// at (at most) the last two machine instructions.
-static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
- MachineBasicBlock &MBB,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- BitVector &AllocatableRegs) {
- BitVector Defs(TRI->getNumRegs());
- BitVector Uses(TRI->getNumRegs());
- SmallVector<unsigned, 4> LocalUses;
- SmallVector<unsigned, 4> Kills;
-
- // Take a look at 2 instructions at most.
- unsigned Count = 0;
- while (Count < 2) {
- if (MII == MBB.begin())
- break;
- MachineInstr *PrevMI = prior(MII);
- MII = PrevMI;
-
- if (PrevMI->isDebugValue())
- continue; // Skip over dbg_value instructions.
- ++Count;
-
- for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = PrevMI->getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned Reg = MO.getReg();
- if (MO.isDef()) {
- Defs.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Defs.set(*AS);
- } else {
- LocalUses.push_back(Reg);
- if (MO.isKill() && AllocatableRegs[Reg])
- Kills.push_back(Reg);
- }
- }
-
- for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
- unsigned Kill = Kills[i];
- if (!Defs[Kill] && !Uses[Kill] &&
- RC->contains(Kill))
- return Kill;
- }
- for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
- unsigned Reg = LocalUses[i];
- Uses.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Uses.set(*AS);
- }
- }
-
- return 0;
-}
-
-static
-void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
- const TargetRegisterInfo &TRI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == VirtReg)
- substitutePhysReg(MO, PhysReg, TRI);
- }
-}
-
-namespace {
-
-struct RefSorter {
- bool operator()(const std::pair<MachineInstr*, int> &A,
- const std::pair<MachineInstr*, int> &B) {
- return A.second < B.second;
- }
-};
-
-// ***************************** //
-// Local Spiller Implementation //
-// ***************************** //
-
-class LocalRewriter : public VirtRegRewriter {
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- VirtRegMap *VRM;
- LiveIntervals *LIs;
- BitVector AllocatableRegs;
- DenseMap<MachineInstr*, unsigned> DistanceMap;
- DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
-
- MachineBasicBlock *MBB; // Basic block currently being processed.
-
-public:
-
- bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
- LiveIntervals* LIs);
-
-private:
- void EraseInstr(MachineInstr *MI) {
- VRM->RemoveMachineInstrFromMaps(MI);
- LIs->RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- }
-
- bool OptimizeByUnfold2(unsigned VirtReg, int SS,
- MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
- unsigned VirtReg, unsigned SrcReg, int SS,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- const TargetRegisterInfo *TRI);
-
- void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
- int Idx, unsigned PhysReg, int StackSlot,
- const TargetRegisterClass *RC,
- bool isAvailable, MachineInstr *&LastStore,
- AvailableSpills &Spills,
- SmallSet<MachineInstr*, 4> &ReMatDefs,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- void TransferDeadness(unsigned Reg, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool InsertEmergencySpills(MachineInstr *MI);
-
- bool InsertRestores(MachineInstr *MI,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool InsertSpills(MachineInstr *MI);
-
- void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- BitVector &RegKills,
- ReuseInfo &ReusedOperands,
- std::vector<MachineOperand*> &KillOps);
-
- void RewriteMBB(LiveIntervals *LIs,
- AvailableSpills &Spills, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-};
-}
-
-bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
- LiveIntervals* lis) {
- MRI = &MF.getRegInfo();
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
- VRM = &vrm;
- LIs = lis;
- AllocatableRegs = TRI->getAllocatableSet(MF);
- DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
- << MF.getFunction()->getName() << "':\n");
- DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
- " reloads!) ****\n");
- DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
- // Spills - Keep track of which spilled values are available in physregs
- // so that we can choose to reuse the physregs instead of emitting
- // reloads. This is usually refreshed per basic block.
- AvailableSpills Spills(TRI, TII);
-
- // Keep track of kill information.
- BitVector RegKills(TRI->getNumRegs());
- std::vector<MachineOperand*> KillOps;
- KillOps.resize(TRI->getNumRegs(), NULL);
-
- // SingleEntrySuccs - Successor blocks which have a single predecessor.
- SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
- SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
-
- // Traverse the basic blocks depth first.
- MachineBasicBlock *Entry = MF.begin();
- SmallPtrSet<MachineBasicBlock*,16> Visited;
- for (df_ext_iterator<MachineBasicBlock*,
- SmallPtrSet<MachineBasicBlock*,16> >
- DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
- DFI != E; ++DFI) {
- MBB = *DFI;
- if (!EarlyVisited.count(MBB))
- RewriteMBB(LIs, Spills, RegKills, KillOps);
-
- // If this MBB is the only predecessor of a successor. Keep the
- // availability information and visit it next.
- do {
- // Keep visiting single predecessor successor as long as possible.
- SinglePredSuccs.clear();
- findSinglePredSuccessor(MBB, SinglePredSuccs);
- if (SinglePredSuccs.empty())
- MBB = 0;
- else {
- // FIXME: More than one successors, each of which has MBB has
- // the only predecessor.
- MBB = SinglePredSuccs[0];
- if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
- Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
- RewriteMBB(LIs, Spills, RegKills, KillOps);
- }
- }
- } while (MBB);
-
- // Clear the availability info.
- Spills.clear();
- }
-
- DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
- // Mark unused spill slots.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- int SS = VRM->getLowSpillSlot();
- if (SS != VirtRegMap::NO_STACK_SLOT) {
- for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
- SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
- if (!VRM->isSpillSlotUsed(SS)) {
- MFI->RemoveStackObject(SS);
- for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
- MachineInstr *DVMI = DbgValues[j];
- DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
- EraseInstr(DVMI);
- }
- ++NumDSS;
- }
- DbgValues.clear();
- }
- }
- Slot2DbgValues.clear();
-
- return true;
-}
-
-/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
-/// a scratch register is available.
-/// xorq %r12<kill>, %r13
-/// addq %rax, -184(%rbp)
-/// addq %r13, -184(%rbp)
-/// ==>
-/// xorq %r12<kill>, %r13
-/// movq -184(%rbp), %r12
-/// addq %rax, %r12
-/// addq %r13, %r12
-/// movq %r12, -184(%rbp)
-bool LocalRewriter::
-OptimizeByUnfold2(unsigned VirtReg, int SS,
- MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
-
- MachineBasicBlock::iterator NextMII = llvm::next(MII);
- // Skip over dbg_value instructions.
- while (NextMII != MBB->end() && NextMII->isDebugValue())
- NextMII = llvm::next(NextMII);
- if (NextMII == MBB->end())
- return false;
-
- if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
- return false;
-
- // Now let's see if the last couple of instructions happens to have freed up
- // a register.
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
- if (!PhysReg)
- return false;
-
- MachineFunction &MF = *MBB->getParent();
- TRI = MF.getTarget().getRegisterInfo();
- MachineInstr &MI = *MII;
- if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
- return false;
-
- // If the next instruction also folds the same SS modref and can be unfoled,
- // then it's worthwhile to issue a load from SS into the free register and
- // then unfold these instructions.
- if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
- return false;
-
- // Back-schedule reloads and remats.
- ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
-
- // Load from SS to the spare physical register.
- TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
- // This invalidates Phys.
- Spills.ClobberPhysReg(PhysReg);
- // Remember it's available.
- Spills.addAvailable(SS, PhysReg);
- MaybeDeadStores[SS] = NULL;
-
- // Unfold current MI.
- SmallVector<MachineInstr*, 4> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
- llvm_unreachable("Unable unfold the load / store folding instruction!");
- assert(NewMIs.size() == 1);
- AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
- VRM->transferRestorePts(&MI, NewMIs[0]);
- MII = MBB->insert(MII, NewMIs[0]);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- ++NumModRefUnfold;
-
- // Unfold next instructions that fold the same SS.
- do {
- MachineInstr &NextMI = *NextMII;
- NextMII = llvm::next(NextMII);
- NewMIs.clear();
- if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
- llvm_unreachable("Unable unfold the load / store folding instruction!");
- assert(NewMIs.size() == 1);
- AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
- VRM->transferRestorePts(&NextMI, NewMIs[0]);
- MBB->insert(NextMII, NewMIs[0]);
- InvalidateKills(NextMI, TRI, RegKills, KillOps);
- EraseInstr(&NextMI);
- ++NumModRefUnfold;
- // Skip over dbg_value instructions.
- while (NextMII != MBB->end() && NextMII->isDebugValue())
- NextMII = llvm::next(NextMII);
- if (NextMII == MBB->end())
- break;
- } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
-
- // Store the value back into SS.
- TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
- MachineInstr *StoreMI = prior(NextMII);
- VRM->addSpillSlotUse(SS, StoreMI);
- VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-
- return true;
-}
-
-/// OptimizeByUnfold - Turn a store folding instruction into a load folding
-/// instruction. e.g.
-/// xorl %edi, %eax
-/// movl %eax, -32(%ebp)
-/// movl -36(%ebp), %eax
-/// orl %eax, -32(%ebp)
-/// ==>
-/// xorl %edi, %eax
-/// orl -36(%ebp), %eax
-/// mov %eax, -32(%ebp)
-/// This enables unfolding optimization for a subsequent instruction which will
-/// also eliminate the newly introduced store instruction.
-bool LocalRewriter::
-OptimizeByUnfold(MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- MachineFunction &MF = *MBB->getParent();
- MachineInstr &MI = *MII;
- unsigned UnfoldedOpc = 0;
- unsigned UnfoldPR = 0;
- unsigned UnfoldVR = 0;
- int FoldedSS = VirtRegMap::NO_STACK_SLOT;
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
- for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
- // Only transform a MI that folds a single register.
- if (UnfoldedOpc)
- return false;
- UnfoldVR = I->second.first;
- VirtRegMap::ModRef MR = I->second.second;
- // MI2VirtMap be can updated which invalidate the iterator.
- // Increment the iterator first.
- ++I;
- if (VRM->isAssignedReg(UnfoldVR))
- continue;
- // If this reference is not a use, any previous store is now dead.
- // Otherwise, the store to this stack slot is not dead anymore.
- FoldedSS = VRM->getStackSlot(UnfoldVR);
- MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
- if (DeadStore && (MR & VirtRegMap::isModRef)) {
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
- if (!PhysReg || !DeadStore->readsRegister(PhysReg))
- continue;
- UnfoldPR = PhysReg;
- UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
- false, true);
- }
- }
-
- if (!UnfoldedOpc) {
- if (!UnfoldVR)
- return false;
-
- // Look for other unfolding opportunities.
- return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
- RegKills, KillOps);
- }
-
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
- continue;
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
- continue;
- if (VRM->isAssignedReg(VirtReg)) {
- unsigned PhysReg = VRM->getPhys(VirtReg);
- if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
- return false;
- } else if (VRM->isReMaterialized(VirtReg))
- continue;
- int SS = VRM->getStackSlot(VirtReg);
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
- if (PhysReg) {
- if (TRI->regsOverlap(PhysReg, UnfoldPR))
- return false;
- continue;
- }
- if (VRM->hasPhys(VirtReg)) {
- PhysReg = VRM->getPhys(VirtReg);
- if (!TRI->regsOverlap(PhysReg, UnfoldPR))
- continue;
- }
-
- // Ok, we'll need to reload the value into a register which makes
- // it impossible to perform the store unfolding optimization later.
- // Let's see if it is possible to fold the load if the store is
- // unfolded. This allows us to perform the store unfolding
- // optimization.
- SmallVector<MachineInstr*, 4> NewMIs;
- if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
- assert(NewMIs.size() == 1);
- MachineInstr *NewMI = NewMIs.back();
- MBB->insert(MII, NewMI);
- NewMIs.clear();
- int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
- assert(Idx != -1);
- SmallVector<unsigned, 1> Ops;
- Ops.push_back(Idx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
- NewMI->eraseFromParent();
- if (FoldedMI) {
- VRM->addSpillSlotUse(SS, FoldedMI);
- if (!VRM->hasPhys(UnfoldVR))
- VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
- VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
- MII = FoldedMI;
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- return true;
- }
- }
- }
-
- return false;
-}
-
-/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
-/// where SrcReg is r1 and it is tied to r0. Return true if after
-/// commuting this instruction it will be r0 = op r2, r1.
-static bool CommuteChangesDestination(MachineInstr *DefMI,
- const MCInstrDesc &MCID,
- unsigned SrcReg,
- const TargetInstrInfo *TII,
- unsigned &DstIdx) {
- if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
- return false;
- if (!DefMI->getOperand(1).isReg() ||
- DefMI->getOperand(1).getReg() != SrcReg)
- return false;
- unsigned DefIdx;
- if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
- return false;
- unsigned SrcIdx1, SrcIdx2;
- if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
- return false;
- if (SrcIdx1 == 1 && SrcIdx2 == 2) {
- DstIdx = 2;
- return true;
- }
- return false;
-}
-
-/// CommuteToFoldReload -
-/// Look for
-/// r1 = load fi#1
-/// r1 = op r1, r2<kill>
-/// store r1, fi#1
-///
-/// If op is commutable and r2 is killed, then we can xform these to
-/// r2 = op r2, fi#1
-/// store r2, fi#1
-bool LocalRewriter::
-CommuteToFoldReload(MachineBasicBlock::iterator &MII,
- unsigned VirtReg, unsigned SrcReg, int SS,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- const TargetRegisterInfo *TRI) {
- if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
- return false;
-
- MachineInstr &MI = *MII;
- MachineBasicBlock::iterator DefMII = prior(MII);
- MachineInstr *DefMI = DefMII;
- const MCInstrDesc &MCID = DefMI->getDesc();
- unsigned NewDstIdx;
- if (DefMII != MBB->begin() &&
- MCID.isCommutable() &&
- CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
- MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
- unsigned NewReg = NewDstMO.getReg();
- if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
- return false;
- MachineInstr *ReloadMI = prior(DefMII);
- int FrameIdx;
- unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
- if (DestReg != SrcReg || FrameIdx != SS)
- return false;
- int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
- if (UseIdx == -1)
- return false;
- unsigned DefIdx;
- if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
- return false;
- assert(DefMI->getOperand(DefIdx).isReg() &&
- DefMI->getOperand(DefIdx).getReg() == SrcReg);
-
- // Now commute def instruction.
- MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
- if (!CommutedMI)
- return false;
- MBB->insert(MII, CommutedMI);
- SmallVector<unsigned, 1> Ops;
- Ops.push_back(NewDstIdx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
- // Not needed since foldMemoryOperand returns new MI.
- CommutedMI->eraseFromParent();
- if (!FoldedMI)
- return false;
-
- VRM->addSpillSlotUse(SS, FoldedMI);
- VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
- // Insert new def MI and spill MI.
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
- MII = prior(MII);
- MachineInstr *StoreMI = MII;
- VRM->addSpillSlotUse(SS, StoreMI);
- VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
- MII = FoldedMI; // Update MII to backtrack.
-
- // Delete all 3 old instructions.
- InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
- EraseInstr(ReloadMI);
- InvalidateKills(*DefMI, TRI, RegKills, KillOps);
- EraseInstr(DefMI);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
-
- // If NewReg was previously holding value of some SS, it's now clobbered.
- // This has to be done now because it's a physical register. When this
- // instruction is re-visited, it's ignored.
- Spills.ClobberPhysReg(NewReg);
-
- ++NumCommutes;
- return true;
- }
-
- return false;
-}
-
-/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
-/// the last store to the same slot is now dead. If so, remove the last store.
-void LocalRewriter::
-SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
- int Idx, unsigned PhysReg, int StackSlot,
- const TargetRegisterClass *RC,
- bool isAvailable, MachineInstr *&LastStore,
- AvailableSpills &Spills,
- SmallSet<MachineInstr*, 4> &ReMatDefs,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
-
- MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
- TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
- TRI);
- MachineInstr *StoreMI = prior(oldNextMII);
- VRM->addSpillSlotUse(StackSlot, StoreMI);
- DEBUG(dbgs() << "Store:\t" << *StoreMI);
-
- // If there is a dead store to this stack slot, nuke it now.
- if (LastStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
- ++NumDSE;
- SmallVector<unsigned, 2> KillRegs;
- InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
- MachineBasicBlock::iterator PrevMII = LastStore;
- bool CheckDef = PrevMII != MBB->begin();
- if (CheckDef)
- --PrevMII;
- EraseInstr(LastStore);
- if (CheckDef) {
- // Look at defs of killed registers on the store. Mark the defs
- // as dead since the store has been deleted and they aren't
- // being reused.
- for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
- bool HasOtherDef = false;
- if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
- MachineInstr *DeadDef = PrevMII;
- if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
- // FIXME: This assumes a remat def does not have side effects.
- EraseInstr(DeadDef);
- ++NumDRM;
- }
- }
- }
- }
- }
-
- // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume
- // the last of multiple instructions is the actual store.
- LastStore = prior(oldNextMII);
-
- // If the stack slot value was previously available in some other
- // register, change it now. Otherwise, make the register available,
- // in PhysReg.
- Spills.ModifyStackSlotOrReMat(StackSlot);
- Spills.ClobberPhysReg(PhysReg);
- Spills.addAvailable(StackSlot, PhysReg, isAvailable);
- ++NumStores;
-}
-
-/// isSafeToDelete - Return true if this instruction doesn't produce any side
-/// effect and all of its defs are dead.
-static bool isSafeToDelete(MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
- MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
- MI.isLabel() || MI.isDebugValue() ||
- MI.hasUnmodeledSideEffects())
- return false;
-
- // Technically speaking inline asm without side effects and no defs can still
- // be deleted. But there is so much bad inline asm code out there, we should
- // let them be.
- if (MI.isInlineAsm())
- return false;
-
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isDef() && !MO.isDead())
- return false;
- if (MO.isUse() && MO.isKill())
- // FIXME: We can't remove kill markers or else the scavenger will assert.
- // An alternative is to add a ADD pseudo instruction to replace kill
- // markers.
- return false;
- }
- return true;
-}
-
-/// TransferDeadness - A identity copy definition is dead and it's being
-/// removed. Find the last def or use and mark it as dead / kill.
-void LocalRewriter::
-TransferDeadness(unsigned Reg, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- SmallPtrSet<MachineInstr*, 4> Seens;
- SmallVector<std::pair<MachineInstr*, int>,8> Refs;
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
- RE = MRI->reg_end(); RI != RE; ++RI) {
- MachineInstr *UDMI = &*RI;
- if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
- continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
- if (DI == DistanceMap.end())
- continue;
- if (Seens.insert(UDMI))
- Refs.push_back(std::make_pair(UDMI, DI->second));
- }
-
- if (Refs.empty())
- return;
- std::sort(Refs.begin(), Refs.end(), RefSorter());
-
- while (!Refs.empty()) {
- MachineInstr *LastUDMI = Refs.back().first;
- Refs.pop_back();
-
- MachineOperand *LastUD = NULL;
- for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = LastUDMI->getOperand(i);
- if (!MO.isReg() || MO.getReg() != Reg)
- continue;
- if (!LastUD || (LastUD->isUse() && MO.isDef()))
- LastUD = &MO;
- if (LastUDMI->isRegTiedToDefOperand(i))
- break;
- }
- if (LastUD->isDef()) {
- // If the instruction has no side effect, delete it and propagate
- // backward further. Otherwise, mark is dead and we are done.
- if (!isSafeToDelete(*LastUDMI)) {
- LastUD->setIsDead();
- break;
- }
- EraseInstr(LastUDMI);
- } else {
- LastUD->setIsKill();
- RegKills.set(Reg);
- KillOps[Reg] = LastUD;
- break;
- }
- }
-}
-
-/// InsertEmergencySpills - Insert emergency spills before MI if requested by
-/// VRM. Return true if spills were inserted.
-bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
- if (!VRM->hasEmergencySpills(MI))
- return false;
- MachineBasicBlock::iterator MII = MI;
- SmallSet<int, 4> UsedSS;
- std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
- for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
- unsigned PhysReg = EmSpills[i];
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
- assert(RC && "Unable to determine register class!");
- int SS = VRM->getEmergencySpillSlot(RC);
- if (UsedSS.count(SS))
- llvm_unreachable("Need to spill more than one physical registers!");
- UsedSS.insert(SS);
- TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
- MachineInstr *StoreMI = prior(MII);
- VRM->addSpillSlotUse(SS, StoreMI);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
- TII, *MBB->getParent());
-
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
-
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SS, LoadMI);
- ++NumPSpills;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- return true;
-}
-
-/// InsertRestores - Restore registers before MI is requested by VRM. Return
-/// true is any instructions were inserted.
-bool LocalRewriter::InsertRestores(MachineInstr *MI,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (!VRM->isRestorePt(MI))
- return false;
- MachineBasicBlock::iterator MII = MI;
- std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
- for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
- unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order.
- if (!VRM->getPreSplitReg(VirtReg))
- continue; // Split interval spilled again.
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
-
- // Check if the value being restored if available. If so, it must be
- // from a predecessor BB that fallthrough into this BB. We do not
- // expect:
- // BB1:
- // r1 = load fi#1
- // ...
- // = r1<kill>
- // ... # r1 not clobbered
- // ...
- // = load fi#1
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
- if (InReg == Phys) {
- // If the value is already available in the expected register, save
- // a reload / remat.
- if (SSorRMId)
- DEBUG(dbgs() << "Reusing RM#"
- << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
- <<" instead of reloading into physreg "
- << TRI->getName(Phys) << '\n');
-
- // Reusing a physreg may resurrect it. But we expect ProcessUses to update
- // the kill flags for the current instruction after processing it.
-
- ++NumOmitted;
- continue;
- } else if (InReg && InReg != Phys) {
- if (SSorRMId)
- DEBUG(dbgs() << "Reusing RM#"
- << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
- <<" by copying it into physreg "
- << TRI->getName(Phys) << '\n');
-
- // If the reloaded / remat value is available in another register,
- // copy it to the desired register.
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
- *MBB->getParent());
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), Phys)
- .addReg(InReg, RegState::Kill);
-
- // This invalidates Phys.
- Spills.ClobberPhysReg(Phys);
- // Remember it's available.
- Spills.addAvailable(SSorRMId, Phys);
-
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- DEBUG(dbgs() << '\t' << *CopyMI);
- ++NumCopified;
- continue;
- }
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
- *MBB->getParent());
-
- if (VRM->isReMaterialized(VirtReg)) {
- ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
-
- // This invalidates Phys.
- Spills.ClobberPhysReg(Phys);
- // Remember it's available.
- Spills.addAvailable(SSorRMId, Phys);
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(MII));
- }
- return true;
-}
-
-/// InsertSpills - Insert spills after MI if requested by VRM. Return
-/// true if spills were inserted.
-bool LocalRewriter::InsertSpills(MachineInstr *MI) {
- if (!VRM->isSpillPt(MI))
- return false;
- MachineBasicBlock::iterator MII = MI;
- std::vector<std::pair<unsigned,bool> > &SpillRegs =
- VRM->getSpillPtSpills(MI);
- for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
- unsigned VirtReg = SpillRegs[i].first;
- bool isKill = SpillRegs[i].second;
- if (!VRM->getPreSplitReg(VirtReg))
- continue; // Split interval spilled again.
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- unsigned Phys = VRM->getPhys(VirtReg);
- int StackSlot = VRM->getStackSlot(VirtReg);
- MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
- TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
- RC, TRI);
- MachineInstr *StoreMI = prior(oldNextMII);
- VRM->addSpillSlotUse(StackSlot, StoreMI);
- DEBUG(dbgs() << "Store:\t" << *StoreMI);
- VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
- }
- return true;
-}
-
-
-/// ProcessUses - Process all of MI's spilled operands and all available
-/// operands.
-void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- BitVector &RegKills,
- ReuseInfo &ReusedOperands,
- std::vector<MachineOperand*> &KillOps) {
- // Clear kill info.
- SmallSet<unsigned, 2> KilledMIRegs;
- SmallVector<unsigned, 4> VirtUseOps;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue; // Ignore non-register operands.
-
- unsigned VirtReg = MO.getReg();
-
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
- // Ignore physregs for spilling, but remember that it is used by this
- // function.
- MRI->setPhysRegUsed(VirtReg);
- continue;
- }
-
- // We want to process implicit virtual register uses first.
- if (MO.isImplicit())
- // If the virtual register is implicitly defined, emit a implicit_def
- // before so scavenger knows it's "defined".
- // FIXME: This is a horrible hack done the by register allocator to
- // remat a definition with virtual register operand.
- VirtUseOps.insert(VirtUseOps.begin(), i);
- else
- VirtUseOps.push_back(i);
-
- // A partial def causes problems because the same operand both reads and
- // writes the register. This rewriter is designed to rewrite uses and defs
- // separately, so a partial def would already have been rewritten to a
- // physreg by the time we get to processing defs.
- // Add an implicit use operand to model the partial def.
- if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
- MI.findRegisterUseOperandIdx(VirtReg) == -1) {
- VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
- MI.addOperand(MachineOperand::CreateReg(VirtReg,
- false, // isDef
- true)); // isImplicit
- DEBUG(dbgs() << "Partial redef: " << MI);
- }
- }
-
- // Process all of the spilled uses and all non spilled reg references.
- SmallVector<int, 2> PotentialDeadStoreSlots;
- KilledMIRegs.clear();
- for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
- unsigned i = VirtUseOps[j];
- unsigned VirtReg = MI.getOperand(i).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register?");
-
- unsigned SubIdx = MI.getOperand(i).getSubReg();
- if (VRM->isAssignedReg(VirtReg)) {
- // This virtual register was assigned a physreg!
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
- if (MI.getOperand(i).isDef())
- ReusedOperands.markClobbered(Phys);
- substitutePhysReg(MI.getOperand(i), Phys, *TRI);
- if (VRM->isImplicitlyDefined(VirtReg))
- // FIXME: Is this needed?
- BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
- continue;
- }
-
- // This virtual register is now known to be a spilled value.
- if (!MI.getOperand(i).isUse())
- continue; // Handle defs in the loop below (handle use&def here though)
-
- bool AvoidReload = MI.getOperand(i).isUndef();
- // Check if it is defined by an implicit def. It should not be spilled.
- // Note, this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- int ReuseSlot = SSorRMId;
-
- // Check to see if this stack slot is available.
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
- // If this is a sub-register use, make sure the reuse register is in the
- // right register class. For example, for x86 not all of the 32-bit
- // registers have accessible sub-registers.
- // Similarly so for EXTRACT_SUBREG. Consider this:
- // EDI = op
- // MOV32_mr fi#1, EDI
- // ...
- // = EXTRACT_SUBREG fi#1
- // fi#1 is available in EDI, but it cannot be reused because it's not in
- // the right register file.
- if (PhysReg && !AvoidReload && SubIdx) {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- if (!RC->contains(PhysReg))
- PhysReg = 0;
- }
-
- if (PhysReg && !AvoidReload) {
- // This spilled operand might be part of a two-address operand. If this
- // is the case, then changing it will necessarily require changing the
- // def part of the instruction as well. However, in some cases, we
- // aren't allowed to modify the reused register. If none of these cases
- // apply, reuse it.
- bool CanReuse = true;
- bool isTied = MI.isRegTiedToDefOperand(i);
- if (isTied) {
- // Okay, we have a two address operand. We can reuse this physreg as
- // long as we are allowed to clobber the value and there isn't an
- // earlier def that has already clobbered the physreg.
- CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg);
- }
- // If this is an asm, and a PhysReg alias is used elsewhere as an
- // earlyclobber operand, we can't also use it as an input.
- if (MI.isInlineAsm()) {
- for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
- MachineOperand &MOk = MI.getOperand(k);
- if (MOk.isReg() && MOk.isEarlyClobber() &&
- TRI->regsOverlap(MOk.getReg(), PhysReg)) {
- CanReuse = false;
- DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
- << " for " << PrintReg(VirtReg) << ": " << MOk
- << '\n');
- break;
- }
- }
- }
-
- if (CanReuse) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
- << " instead of reloading into "
- << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- // Reusing a physreg may resurrect it. But we expect ProcessUses to
- // update the kill flags for the current instr after processing it.
-
- // The only technical detail we have is that we don't know that
- // PhysReg won't be clobbered by a reloaded stack slot that occurs
- // later in the instruction. In particular, consider 'op V1, V2'.
- // If V1 is available in physreg R0, we would choose to reuse it
- // here, instead of reloading it into the register the allocator
- // indicated (say R1). However, V2 might have to be reloaded
- // later, and it might indicate that it needs to live in R0. When
- // this occurs, we need to have information available that
- // indicates it is safe to use R1 for the reload instead of R0.
- //
- // To further complicate matters, we might conflict with an alias,
- // or R0 and R1 might not be compatible with each other. In this
- // case, we actually insert a reload for V1 in R1, ensuring that
- // we can get at R0 or its alias.
- ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
- VRM->getPhys(VirtReg), VirtReg);
- if (isTied)
- // Only mark it clobbered if this is a use&def operand.
- ReusedOperands.markClobbered(PhysReg);
- ++NumReused;
-
- if (MI.getOperand(i).isKill() &&
- ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
- // The store of this spilled value is potentially dead, but we
- // won't know for certain until we've confirmed that the re-use
- // above is valid, which means waiting until the other operands
- // are processed. For now we just track the spill slot, we'll
- // remove it after the other operands are processed if valid.
-
- PotentialDeadStoreSlots.push_back(ReuseSlot);
- }
-
- // Mark is isKill if it's there no other uses of the same virtual
- // register and it's not a two-address operand. IsKill will be
- // unset if reg is reused.
- if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
- continue;
- } // CanReuse
-
- // Otherwise we have a situation where we have a two-address instruction
- // whose mod/ref operand needs to be reloaded. This reload is already
- // available in some register "PhysReg", but if we used PhysReg as the
- // operand to our 2-addr instruction, the instruction would modify
- // PhysReg. This isn't cool if something later uses PhysReg and expects
- // to get its initial value.
- //
- // To avoid this problem, and to avoid doing a load right after a store,
- // we emit a copy from PhysReg into the designated register for this
- // operand.
- //
- // This case also applies to an earlyclobber'd PhysReg.
- unsigned DesignatedReg = VRM->getPhys(VirtReg);
- assert(DesignatedReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- DesignatedReg = ReusedOperands.
- GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
- MaybeDeadStores, RegKills, KillOps, *VRM);
-
- // If the mapped designated register is actually the physreg we have
- // incoming, we don't need to inserted a dead copy.
- if (DesignatedReg == PhysReg) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
- << " for " << PrintReg(VirtReg)
- << " instead of reloading into same physreg.\n");
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- ReusedOperands.markClobbered(RReg);
- ++NumReused;
- continue;
- }
-
- MRI->setPhysRegUsed(DesignatedReg);
- ReusedOperands.markClobbered(DesignatedReg);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, *MBB->getParent());
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- DesignatedReg).addReg(PhysReg);
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- // This invalidates DesignatedReg.
- Spills.ClobberPhysReg(DesignatedReg);
-
- Spills.addAvailable(ReuseSlot, DesignatedReg);
- unsigned RReg =
- SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- ++NumReused;
- continue;
- } // if (PhysReg)
-
- // Otherwise, reload it and remember that we have it.
- PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
- MRI->setPhysRegUsed(PhysReg);
- ReusedOperands.markClobbered(PhysReg);
- if (AvoidReload)
- ++NumAvoided;
- else {
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, *MBB->getParent());
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- // This invalidates PhysReg.
- Spills.ClobberPhysReg(PhysReg);
-
- // Any stores to this stack slot are not dead anymore.
- if (!DoReMat)
- MaybeDeadStores[SSorRMId] = NULL;
- Spills.addAvailable(SSorRMId, PhysReg);
- // Assumes this is the last use. IsKill will be unset if reg is reused
- // unless it's a two-address operand.
- if (!MI.isRegTiedToDefOperand(i) &&
- KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- }
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- }
-
- // Ok - now we can remove stores that have been confirmed dead.
- for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
- // This was the last use and the spilled value is still available
- // for reuse. That means the spill was unnecessary!
- int PDSSlot = PotentialDeadStoreSlots[j];
- MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
- if (DeadStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- EraseInstr(DeadStore);
- MaybeDeadStores[PDSSlot] = NULL;
- ++NumDSE;
- }
- }
-}
-
-/// rewriteMBB - Keep track of which spills are available even after the
-/// register allocator is done with them. If possible, avoid reloading vregs.
-void
-LocalRewriter::RewriteMBB(LiveIntervals *LIs,
- AvailableSpills &Spills, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
-
- DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
- << MBB->getName() << "':\n");
-
- MachineFunction &MF = *MBB->getParent();
-
- // MaybeDeadStores - When we need to write a value back into a stack slot,
- // keep track of the inserted store. If the stack slot value is never read
- // (because the value was used from some available register, for example), and
- // subsequently stored to, the original store is dead. This map keeps track
- // of inserted stores that are not used. If we see a subsequent store to the
- // same stack slot, the original store is deleted.
- std::vector<MachineInstr*> MaybeDeadStores;
- MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
-
- // ReMatDefs - These are rematerializable def MIs which are not deleted.
- SmallSet<MachineInstr*, 4> ReMatDefs;
-
- // Keep track of the registers we have already spilled in case there are
- // multiple defs of the same register in MI.
- SmallSet<unsigned, 8> SpilledMIRegs;
-
- RegKills.reset();
- KillOps.clear();
- KillOps.resize(TRI->getNumRegs(), NULL);
-
- DistanceMap.clear();
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E; ) {
- MachineBasicBlock::iterator NextMII = llvm::next(MII);
-
- if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
- NextMII = llvm::next(MII);
-
- if (InsertEmergencySpills(MII))
- NextMII = llvm::next(MII);
-
- InsertRestores(MII, Spills, RegKills, KillOps);
-
- if (InsertSpills(MII))
- NextMII = llvm::next(MII);
-
- bool Erased = false;
- bool BackTracked = false;
- MachineInstr &MI = *MII;
-
- // Remember DbgValue's which reference stack slots.
- if (MI.isDebugValue() && MI.getOperand(0).isFI())
- Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
-
- /// ReusedOperands - Keep track of operand reuse in case we need to undo
- /// reuse.
- ReuseInfo ReusedOperands(MI, TRI);
-
- ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
-
- DEBUG(dbgs() << '\t' << MI);
-
-
- // If we have folded references to memory operands, make sure we clear all
- // physical registers that may contain the value of the spilled virtual
- // register
-
- // Copy the folded virts to a small vector, we may change MI2VirtMap.
- SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
- // C++0x FTW!
- for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
- VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
- VRM->getFoldedVirts(&MI);
- FVRange.first != FVRange.second; ++FVRange.first)
- FoldedVirts.push_back(FVRange.first->second);
-
- SmallSet<int, 2> FoldedSS;
- for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
- unsigned VirtReg = FoldedVirts[FVI].first;
- VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
- DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR);
-
- int SS = VRM->getStackSlot(VirtReg);
- if (SS == VirtRegMap::NO_STACK_SLOT)
- continue;
- FoldedSS.insert(SS);
- DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
-
- // If this folded instruction is just a use, check to see if it's a
- // straight load from the virt reg slot.
- if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
- int FrameIdx;
- unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
- if (DestReg && FrameIdx == SS) {
- // If this spill slot is available, turn it into a copy (or nothing)
- // instead of leaving it as a load!
- if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
- DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
- if (DestReg != InReg) {
- MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
- MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY))
- .addReg(DestReg, RegState::Define, DefMO->getSubReg())
- .addReg(InReg, RegState::Kill);
- // Revisit the copy so we make sure to notice the effects of the
- // operation on the destreg (either needing to RA it if it's
- // virtual or needing to clobber any values if it's physical).
- NextMII = CopyMI;
- NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- BackTracked = true;
- } else {
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- // InvalidateKills resurrects any prior kill of the copy's source
- // allowing the source reg to be reused in place of the copy.
- Spills.disallowClobberPhysReg(InReg);
- }
-
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- goto ProcessNextInst;
- }
- } else {
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
- SmallVector<MachineInstr*, 4> NewMIs;
- if (PhysReg &&
- TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
- MBB->insert(MII, NewMIs[0]);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- --NextMII; // backtrack to the unfolded instruction.
- BackTracked = true;
- goto ProcessNextInst;
- }
- }
- }
-
- // If this reference is not a use, any previous store is now dead.
- // Otherwise, the store to this stack slot is not dead anymore.
- MachineInstr* DeadStore = MaybeDeadStores[SS];
- if (DeadStore) {
- bool isDead = !(MR & VirtRegMap::isRef);
- MachineInstr *NewStore = NULL;
- if (MR & VirtRegMap::isModRef) {
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
- SmallVector<MachineInstr*, 4> NewMIs;
- // We can reuse this physreg as long as we are allowed to clobber
- // the value and there isn't an earlier def that has already clobbered
- // the physreg.
- if (PhysReg &&
- !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg) &&
- !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
- MachineOperand *KillOpnd =
- DeadStore->findRegisterUseOperand(PhysReg, true);
- // Note, if the store is storing a sub-register, it's possible the
- // super-register is needed below.
- if (KillOpnd && !KillOpnd->getSubReg() &&
- TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
- MBB->insert(MII, NewMIs[0]);
- NewStore = NewMIs[1];
- MBB->insert(MII, NewStore);
- VRM->addSpillSlotUse(SS, NewStore);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- --NextMII;
- --NextMII; // backtrack to the unfolded instruction.
- BackTracked = true;
- isDead = true;
- ++NumSUnfold;
- }
- }
- }
-
- if (isDead) { // Previous store is dead.
- // If we get here, the store is dead, nuke it now.
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- EraseInstr(DeadStore);
- if (!NewStore)
- ++NumDSE;
- }
-
- MaybeDeadStores[SS] = NULL;
- if (NewStore) {
- // Treat this store as a spill merged into a copy. That makes the
- // stack slot value available.
- VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
- goto ProcessNextInst;
- }
- }
-
- // If the spill slot value is available, and this is a new definition of
- // the value, the value is not available anymore.
- if (MR & VirtRegMap::isMod) {
- // Notice that the value in this stack slot has been modified.
- Spills.ModifyStackSlotOrReMat(SS);
-
- // If this is *just* a mod of the value, check to see if this is just a
- // store to the spill slot (i.e. the spill got merged into the copy). If
- // so, realize that the vreg is available now, and add the store to the
- // MaybeDeadStore info.
- int StackSlot;
- if (!(MR & VirtRegMap::isRef)) {
- if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
- assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- "Src hasn't been allocated yet?");
-
- if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
- Spills, RegKills, KillOps, TRI)) {
- NextMII = llvm::next(MII);
- BackTracked = true;
- goto ProcessNextInst;
- }
-
- // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
- // this as a potentially dead store in case there is a subsequent
- // store into the stack slot without a read from it.
- MaybeDeadStores[StackSlot] = &MI;
-
- // If the stack slot value was previously available in some other
- // register, change it now. Otherwise, make the register
- // available in PhysReg.
- Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
- }
- }
- }
- }
-
- // Process all of the spilled defs.
- SpilledMIRegs.clear();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!(MO.isReg() && MO.getReg() && MO.isDef()))
- continue;
-
- unsigned VirtReg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- // Check to see if this is a noop copy. If so, eliminate the
- // instruction before considering the dest reg to be changed.
- // Also check if it's copying from an "undef", if so, we can't
- // eliminate this or else the undef marker is lost and it will
- // confuses the scavenger. This is extremely rare.
- if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
- MI.getNumOperands() == 2) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- SmallVector<unsigned, 2> KillRegs;
- InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
- if (MO.isDead() && !KillRegs.empty()) {
- // Source register or an implicit super/sub-register use is killed.
- assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
- // Last def is now dead.
- TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
- }
- EraseInstr(&MI);
- Erased = true;
- Spills.disallowClobberPhysReg(VirtReg);
- goto ProcessNextInst;
- }
-
- // If it's not a no-op copy, it clobbers the value in the destreg.
- Spills.ClobberPhysReg(VirtReg);
- ReusedOperands.markClobbered(VirtReg);
-
- // Check to see if this instruction is a load from a stack slot into
- // a register. If so, this provides the stack slot value in the reg.
- int FrameIdx;
- if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
- assert(DestReg == VirtReg && "Unknown load situation!");
-
- // If it is a folded reference, then it's not safe to clobber.
- bool Folded = FoldedSS.count(FrameIdx);
- // Otherwise, if it wasn't available, remember that it is now!
- Spills.addAvailable(FrameIdx, DestReg, !Folded);
- goto ProcessNextInst;
- }
-
- continue;
- }
-
- unsigned SubIdx = MO.getSubReg();
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- if (DoReMat)
- ReMatDefs.insert(&MI);
-
- // The only vregs left are stack slot definitions.
- int StackSlot = VRM->getStackSlot(VirtReg);
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-
- // If this def is part of a two-address operand, make sure to execute
- // the store from the correct physical register.
- unsigned PhysReg;
- unsigned TiedOp;
- if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
- PhysReg = MI.getOperand(TiedOp).getReg();
- if (SubIdx) {
- unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
- assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
- "Can't find corresponding super-register!");
- PhysReg = SuperReg;
- }
- } else {
- PhysReg = VRM->getPhys(VirtReg);
- if (ReusedOperands.isClobbered(PhysReg)) {
- // Another def has taken the assigned physreg. It must have been a
- // use&def which got it due to reuse. Undo the reuse!
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
- }
- }
-
- // If StackSlot is available in a register that also holds other stack
- // slots, clobber those stack slots now.
- Spills.ClobberSharingStackSlots(StackSlot);
-
- assert(PhysReg && "VR not assigned a physical register?");
- MRI->setPhysRegUsed(PhysReg);
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- ReusedOperands.markClobbered(RReg);
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
- MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
- SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
- LastStore, Spills, ReMatDefs, RegKills, KillOps);
- NextMII = llvm::next(MII);
-
- // Check to see if this is a noop copy. If so, eliminate the
- // instruction before considering the dest reg to be changed.
- if (MI.isIdentityCopy()) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- UpdateKills(*LastStore, TRI, RegKills, KillOps);
- goto ProcessNextInst;
- }
- }
- }
- ProcessNextInst:
- // Delete dead instructions without side effects.
- if (!Erased && !BackTracked && isSafeToDelete(MI)) {
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- }
- if (!Erased)
- DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
- if (!Erased && !BackTracked) {
- for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
- UpdateKills(*II, TRI, RegKills, KillOps);
- }
- MII = NextMII;
- }
-
-}
-
-llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
- switch (RewriterOpt) {
- default: llvm_unreachable("Unreachable!");
- case local:
- return new LocalRewriter();
- case trivial:
- return new TrivialRewriter();
- }
-}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
deleted file mode 100644
index 93474e0d7ff7..000000000000
--- a/lib/CodeGen/VirtRegRewriter.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
-#define LLVM_CODEGEN_VIRTREGREWRITER_H
-
-namespace llvm {
- class LiveIntervals;
- class MachineFunction;
- class VirtRegMap;
-
- /// VirtRegRewriter interface: Implementations of this interface assign
- /// spilled virtual registers to stack slots, rewriting the code.
- struct VirtRegRewriter {
- virtual ~VirtRegRewriter();
- virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
- LiveIntervals* LIs) = 0;
- };
-
- /// createVirtRegRewriter - Create an return a rewriter object, as specified
- /// on the command line.
- VirtRegRewriter* createVirtRegRewriter();
-
-}
-
-#endif
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index fdffcb6a77c6..441f1e86dcd8 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -10,7 +10,3 @@ add_llvm_library(LLVMDebugInfo
DWARFDebugLine.cpp
DWARFFormValue.cpp
)
-
-add_llvm_library_dependencies(LLVMDebugInfo
- LLVMSupport
- )
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index e1ac398b1011..dccadc4ea4da 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -165,3 +165,5 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) {
return DILineInfo(fileName.c_str(), row.Line, row.Column);
}
+
+void DWARFContextInMemory::anchor() { }
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 746a4639f277..d2e763a87a45 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -86,6 +86,7 @@ public:
/// DWARFContext. It assumes all content is available in memory and stores
/// pointers to it.
class DWARFContextInMemory : public DWARFContext {
+ virtual void anchor();
StringRef InfoSection;
StringRef AbbrevSection;
StringRef ARangeSection;
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp
index a11ae3f2908e..6e6c37e30945 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp
@@ -83,7 +83,7 @@ void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
DWARFAbbreviationDeclarationCollMapConstIter pos;
for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) {
- OS << format("Abbrev table for offset: 0x%8.8x\n", pos->first);
+ OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", pos->first);
pos->second.dump(OS);
}
}
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h
index 03189b132127..c7c0436866c4 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.h
+++ b/lib/DebugInfo/DWARFDebugAbbrev.h
@@ -25,21 +25,21 @@ typedef DWARFAbbreviationDeclarationColl::const_iterator
DWARFAbbreviationDeclarationCollConstIter;
class DWARFAbbreviationDeclarationSet {
- uint64_t Offset;
+ uint32_t Offset;
uint32_t IdxOffset;
std::vector<DWARFAbbreviationDeclaration> Decls;
public:
DWARFAbbreviationDeclarationSet()
: Offset(0), IdxOffset(0) {}
- DWARFAbbreviationDeclarationSet(uint64_t offset, uint32_t idxOffset)
+ DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset)
: Offset(offset), IdxOffset(idxOffset) {}
void clear() {
IdxOffset = 0;
Decls.clear();
}
- uint64_t getOffset() const { return Offset; }
+ uint32_t getOffset() const { return Offset; }
void dump(raw_ostream &OS) const;
bool extract(DataExtractor data, uint32_t* offset_ptr);
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index b0c0354383b9..2efbfd1f92fb 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -122,8 +122,9 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
const uint32_t hex_width = Header.AddrSize * 2;
for (DescriptorConstIter pos = ArangeDescriptors.begin(),
end = ArangeDescriptors.end(); pos != end; ++pos)
- OS << format("[0x%*.*llx -", hex_width, hex_width, pos->Address)
- << format(" 0x%*.*llx)\n", hex_width, hex_width, pos->getEndAddress());
+ OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
+ << format(" 0x%*.*" PRIx64 ")\n",
+ hex_width, hex_width, pos->getEndAddress());
}
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index 576d37d7813a..178814535612 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -100,13 +100,14 @@ void DWARFDebugAranges::dump(raw_ostream &OS) const {
const uint32_t num_ranges = getNumRanges();
for (uint32_t i = 0; i < num_ranges; ++i) {
const Range &range = Aranges[i];
- OS << format("0x%8.8x: [0x%8.8llx - 0x%8.8llx)\n", range.Offset,
- (uint64_t)range.LoPC, (uint64_t)range.HiPC());
+ OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
+ range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC());
}
}
void DWARFDebugAranges::Range::dump(raw_ostream &OS) const {
- OS << format("{0x%8.8x}: [0x%8.8llx - 0x%8.8llx)\n", Offset, LoPC, HiPC());
+ OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
+ Offset, LoPC, HiPC());
}
void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc,
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 1b089adbe13b..236db97c44af 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -26,7 +26,7 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
uint32_t offset = Offset;
if (debug_info_data.isValidOffset(offset)) {
- uint64_t abbrCode = debug_info_data.getULEB128(&offset);
+ uint32_t abbrCode = debug_info_data.getULEB128(&offset);
OS << format("\n0x%8.8x: ", Offset);
if (abbrCode) {
@@ -203,8 +203,6 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
AbbrevDecl = NULL;
return true; // NULL debug tag entry
}
-
- return false;
}
bool
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index aff2e8556729..37b3bcdd96e6 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -23,7 +23,7 @@ class DWARFFormValue;
/// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data.
class DWARFDebugInfoEntryMinimal {
/// Offset within the .debug_info of the start of this entry.
- uint64_t Offset;
+ uint32_t Offset;
/// How many to subtract from "this" to get the parent.
/// If zero this die has no parent.
@@ -52,7 +52,7 @@ public:
uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
bool isNULL() const { return AbbrevDecl == 0; }
- uint64_t getOffset() const { return Offset; }
+ uint32_t getOffset() const { return Offset; }
uint32_t getNumAttributes() const {
return !isNULL() ? AbbrevDecl->getNumAttributes() : 0;
}
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index fe1ef78b026e..117fa31aa86f 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -41,8 +41,9 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
"----------------\n";
for (uint32_t i = 0; i < FileNames.size(); ++i) {
const FileNameEntry& fileEntry = FileNames[i];
- OS << format("file_names[%3u] %4u ", i+1, fileEntry.DirIdx)
- << format("0x%8.8x 0x%8.8x ", fileEntry.ModTime, fileEntry.Length)
+ OS << format("file_names[%3u] %4" PRIu64 " ", i+1, fileEntry.DirIdx)
+ << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ",
+ fileEntry.ModTime, fileEntry.Length)
<< fileEntry.Name << '\n';
}
}
@@ -68,7 +69,7 @@ void DWARFDebugLine::Row::reset(bool default_is_stmt) {
}
void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
- OS << format("0x%16.16llx %6u %6u", Address, Line, Column)
+ OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
<< format(" %6u %3u ", File, Isa)
<< (IsStmt ? " is_stmt" : "")
<< (BasicBlock ? " basic_block" : "")
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index 705efe5549b6..ee2a3ab7b789 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -263,12 +263,12 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
bool cu_relative_offset = false;
switch (Form) {
- case DW_FORM_addr: OS << format("0x%016x", uvalue); break;
+ case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break;
case DW_FORM_flag:
- case DW_FORM_data1: OS << format("0x%02x", uvalue); break;
- case DW_FORM_data2: OS << format("0x%04x", uvalue); break;
- case DW_FORM_data4: OS << format("0x%08x", uvalue); break;
- case DW_FORM_data8: OS << format("0x%016x", uvalue); break;
+ case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break;
+ case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break;
+ case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break;
+ case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break;
case DW_FORM_string:
OS << '"';
OS.write_escaped(getAsCString(NULL));
@@ -280,7 +280,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
case DW_FORM_block4:
if (uvalue > 0) {
switch (Form) {
- case DW_FORM_block: OS << format("<0x%llx> ", uvalue); break;
+ case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break;
case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break;
case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break;
case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break;
@@ -314,7 +314,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
break;
}
case DW_FORM_ref_addr:
- OS << format("0x%016x", uvalue);
+ OS << format("0x%016" PRIx64, uvalue);
break;
case DW_FORM_ref1:
cu_relative_offset = true;
@@ -330,11 +330,11 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
break;
case DW_FORM_ref8:
cu_relative_offset = true;
- OS << format("cu + 0x%8.8llx", uvalue);
+ OS << format("cu + 0x%8.8" PRIx64, uvalue);
break;
case DW_FORM_ref_udata:
cu_relative_offset = true;
- OS << format("cu + 0x%llx", uvalue);
+ OS << format("cu + 0x%" PRIx64, uvalue);
break;
// All DW_FORM_indirect attributes should be resolved prior to calling
@@ -348,7 +348,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
}
if (cu_relative_offset)
- OS << format(" => {0x%8.8x}", (uvalue + (cu ? cu->getOffset() : 0)));
+ OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0));
}
const char*
diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..210b9f9d356e
--- /dev/null
+++ b/lib/DebugInfo/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/DebugInfo/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = DebugInfo
+parent = Libraries
+required_libraries = Support
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index fb14d41e91d2..cb11bfe93c7c 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -1,17 +1,20 @@
+
+
add_llvm_library(LLVMExecutionEngine
ExecutionEngine.cpp
ExecutionEngineBindings.cpp
TargetSelect.cpp
)
-add_llvm_library_dependencies(LLVMExecutionEngine
- LLVMCore
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(Interpreter)
add_subdirectory(JIT)
add_subdirectory(MCJIT)
add_subdirectory(RuntimeDyld)
+
+if( LLVM_USE_OPROFILE )
+ add_subdirectory(OProfileJIT)
+endif( LLVM_USE_OPROFILE )
+
+if( LLVM_USE_INTEL_JITEVENTS )
+ add_subdirectory(IntelJITEvents)
+endif( LLVM_USE_INTEL_JITEVENTS )
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
new file mode 100644
index 000000000000..1c07c947142f
--- /dev/null
+++ b/lib/ExecutionEngine/EventListenerCommon.h
@@ -0,0 +1,67 @@
+//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for JITEventListener implementations
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EVENT_LISTENER_COMMON_H
+#define EVENT_LISTENER_COMMON_H
+
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace jitprofiling {
+
+class FilenameCache {
+ // Holds the filename of each Scope, so that we can pass a null-terminated
+ // string into oprofile. Use an AssertingVH rather than a ValueMap because we
+ // shouldn't be modifying any MDNodes while this map is alive.
+ DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+ DenseMap<AssertingVH<MDNode>, std::string> Paths;
+
+ public:
+ const char *getFilename(MDNode *Scope) {
+ std::string &Filename = Filenames[Scope];
+ if (Filename.empty()) {
+ DIScope DIScope(Scope);
+ Filename = DIScope.getFilename();
+ }
+ return Filename.c_str();
+ }
+
+ const char *getFullPath(MDNode *Scope) {
+ std::string &P = Paths[Scope];
+ if (P.empty()) {
+ DIScope DIScope(Scope);
+ StringRef DirName = DIScope.getDirectory();
+ StringRef FileName = DIScope.getFilename();
+ SmallString<256> FullPath;
+ if (DirName != "." && DirName != "") {
+ FullPath = DirName;
+ }
+ if (FileName != "") {
+ sys::path::append(FullPath, FileName);
+ }
+ P = FullPath.str();
+ }
+ return P.c_str();
+ }
+};
+
+} // namespace jitprofiling
+
+} // namespace llvm
+
+#endif //EVENT_LISTENER_COMMON_H
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 525877b68900..a744d0c1e798 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
@@ -41,14 +42,12 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) = 0;
ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) = 0;
ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
@@ -308,13 +307,12 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
// Should be an array of '{ i32, void ()* }' structs. The first value is
// the init priority, which we ignore.
- if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0)
return;
- ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
- if (isa<ConstantAggregateZero>(InitList->getOperand(i)))
- continue;
- ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (CS == 0) continue;
Constant *FP = CS->getOperand(1);
if (FP->isNullValue())
@@ -404,14 +402,15 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
std::string *ErrorStr,
CodeGenOpt::Level OptLevel,
bool GVsWithCode) {
- return EngineBuilder(M)
+ EngineBuilder EB = EngineBuilder(M)
.setEngineKind(ForceInterpreter
? EngineKind::Interpreter
: EngineKind::JIT)
.setErrorStr(ErrorStr)
.setOptLevel(OptLevel)
- .setAllocateGVsWithCode(GVsWithCode)
- .create();
+ .setAllocateGVsWithCode(GVsWithCode);
+
+ return EB.create();
}
/// createJIT - This is the factory method for creating a JIT for the current
@@ -420,7 +419,7 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
ExecutionEngine *ExecutionEngine::createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
+ CodeGenOpt::Level OL,
bool GVsWithCode,
Reloc::Model RM,
CodeModel::Model CMM) {
@@ -432,18 +431,25 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
// Use the defaults for extra parameters. Users can use EngineBuilder to
// set them.
- StringRef MArch = "";
- StringRef MCPU = "";
- SmallVector<std::string, 1> MAttrs;
-
- TargetMachine *TM =
- EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr);
+ EngineBuilder EB(M);
+ EB.setEngineKind(EngineKind::JIT);
+ EB.setErrorStr(ErrorStr);
+ EB.setRelocationModel(RM);
+ EB.setCodeModel(CMM);
+ EB.setAllocateGVsWithCode(GVsWithCode);
+ EB.setOptLevel(OL);
+ EB.setJITMemoryManager(JMM);
+
+ // TODO: permit custom TargetOptions here
+ TargetMachine *TM = EB.selectTarget();
if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
- return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
+ return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
}
-ExecutionEngine *EngineBuilder::create() {
+ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
+ OwningPtr<TargetMachine> TheTM(TM); // Take ownership.
+
// Make sure we can resolve symbols in the program as well. The zero arg
// to the function tells DynamicLibrary to load the program, not a library.
if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
@@ -464,21 +470,24 @@ ExecutionEngine *EngineBuilder::create() {
// Unless the interpreter was explicitly selected or the JIT is not linked,
// try making a JIT.
- if (WhichEngine & EngineKind::JIT) {
- if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs,
- RelocModel, CMModel,
- ErrorStr)) {
- if (UseMCJIT && ExecutionEngine::MCJITCtor) {
- ExecutionEngine *EE =
- ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
- AllocateGVsWithCode, TM);
- if (EE) return EE;
- } else if (ExecutionEngine::JITCtor) {
- ExecutionEngine *EE =
- ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
- AllocateGVsWithCode, TM);
- if (EE) return EE;
- }
+ if ((WhichEngine & EngineKind::JIT) && TheTM) {
+ Triple TT(M->getTargetTriple());
+ if (!TM->getTarget().hasJIT()) {
+ errs() << "WARNING: This target JIT is not designed for the host"
+ << " you are running. If bad things happen, please choose"
+ << " a different -march switch.\n";
+ }
+
+ if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
+ AllocateGVsWithCode, TheTM.take());
+ if (EE) return EE;
+ } else if (ExecutionEngine::JITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::JITCtor(M, ErrorStr, JMM,
+ AllocateGVsWithCode, TheTM.take());
+ if (EE) return EE;
}
}
@@ -944,30 +953,47 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
DEBUG(Init->dump());
- if (isa<UndefValue>(Init)) {
+ if (isa<UndefValue>(Init))
return;
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+
+ if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
unsigned ElementSize =
getTargetData()->getTypeAllocSize(CP->getType()->getElementType());
for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
return;
- } else if (isa<ConstantAggregateZero>(Init)) {
+ }
+
+ if (isa<ConstantAggregateZero>(Init)) {
memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType()));
return;
- } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+ }
+
+ if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
unsigned ElementSize =
getTargetData()->getTypeAllocSize(CPA->getType()->getElementType());
for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
return;
- } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+ }
+
+ if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
const StructLayout *SL =
getTargetData()->getStructLayout(cast<StructType>(CPS->getType()));
for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
return;
- } else if (Init->getType()->isFirstClassType()) {
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(Init)) {
+ // CDS is already laid out in host memory order.
+ StringRef Data = CDS->getRawDataValues();
+ memcpy(Addr, Data.data(), Data.size());
+ return;
+ }
+
+ if (Init->getType()->isFirstClassType()) {
GenericValue Val = getConstantValue(Init);
StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
return;
@@ -1123,6 +1149,6 @@ void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
const GlobalValue *,
const GlobalValue *) {
- assert(false && "The ExecutionEngine doesn't know how to handle a"
- " RAUW on a value it has a global mapping for.");
+ llvm_unreachable("The ExecutionEngine doesn't know how to handle a"
+ " RAUW on a value it has a global mapping for.");
}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index f8f1f4a78ee5..75e680ab3612 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -76,9 +76,7 @@ double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
return unwrap(GenVal)->DoubleVal;
default:
llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
- break;
}
- return 0; // Not reached
}
void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
new file mode 100644
index 000000000000..7d67d0d8bee1
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+set(system_libs
+ ${system_libs}
+ jitprofiling
+ )
+
+add_llvm_library(LLVMIntelJITEvents
+ IntelJITEventListener.cpp
+ )
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
new file mode 100644
index 000000000000..5dfa78f34a33
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -0,0 +1,183 @@
+//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object to tell Intel(R) VTune(TM)
+// Amplifier XE 2011 about JITted functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "amplifier-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/ValueHandle.h"
+#include "EventListenerCommon.h"
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class IntelJITEventListener : public JITEventListener {
+ typedef DenseMap<void*, unsigned int> MethodIDMap;
+
+ IntelJITEventsWrapper& Wrapper;
+ MethodIDMap MethodIDs;
+ FilenameCache Filenames;
+
+public:
+ IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper)
+ : Wrapper(libraryWrapper) {
+ }
+
+ ~IntelJITEventListener() {
+ }
+
+ virtual void NotifyFunctionEmitted(const Function &F,
+ void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details);
+
+ virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+static LineNumberInfo LineStartToIntelJITFormat(
+ uintptr_t StartAddress,
+ uintptr_t Address,
+ DebugLoc Loc) {
+ LineNumberInfo Result;
+
+ Result.Offset = Address - StartAddress;
+ Result.LineNumber = Loc.getLine();
+
+ return Result;
+}
+
+static iJIT_Method_Load FunctionDescToIntelJITFormat(
+ IntelJITEventsWrapper& Wrapper,
+ const char* FnName,
+ uintptr_t FnStart,
+ size_t FnSize) {
+ iJIT_Method_Load Result;
+ memset(&Result, 0, sizeof(iJIT_Method_Load));
+
+ Result.method_id = Wrapper.iJIT_GetNewMethodID();
+ Result.method_name = const_cast<char*>(FnName);
+ Result.method_load_address = reinterpret_cast<void*>(FnStart);
+ Result.method_size = FnSize;
+
+ Result.class_id = 0;
+ Result.class_file_name = NULL;
+ Result.user_data = NULL;
+ Result.user_data_size = 0;
+ Result.env = iJDE_JittingAPI;
+
+ return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void IntelJITEventListener::NotifyFunctionEmitted(
+ const Function &F, void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details) {
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper,
+ F.getName().data(),
+ reinterpret_cast<uint64_t>(FnStart),
+ FnSize);
+
+ std::vector<LineNumberInfo> LineInfo;
+
+ if (!Details.LineStarts.empty()) {
+ // Now convert the line number information from the address/DebugLoc
+ // format in Details to the offset/lineno in Intel JIT API format.
+
+ LineInfo.reserve(Details.LineStarts.size() + 1);
+
+ DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+ assert(!FirstLoc.isUnknown()
+ && "LineStarts should not contain unknown DebugLocs");
+
+ MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+ DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+ if (FunctionDI.Verify()) {
+ FunctionMessage.source_file_name = const_cast<char*>(
+ Filenames.getFullPath(FirstLocScope));
+
+ LineNumberInfo FirstLine;
+ FirstLine.Offset = 0;
+ FirstLine.LineNumber = FunctionDI.getLineNumber();
+ LineInfo.push_back(FirstLine);
+ }
+
+ for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I =
+ Details.LineStarts.begin(), E = Details.LineStarts.end();
+ I != E; ++I) {
+ // This implementation ignores the DebugLoc filename because the Intel
+ // JIT API does not support multiple source files associated with a single
+ // JIT function
+ LineInfo.push_back(LineStartToIntelJITFormat(
+ reinterpret_cast<uintptr_t>(FnStart),
+ I->Address,
+ I->Loc));
+
+ // If we have no file name yet for the function, use the filename from
+ // the first instruction that has one
+ if (FunctionMessage.source_file_name == 0) {
+ MDNode *scope = I->Loc.getScope(
+ Details.MF->getFunction()->getContext());
+ FunctionMessage.source_file_name = const_cast<char*>(
+ Filenames.getFullPath(scope));
+ }
+ }
+
+ FunctionMessage.line_number_size = LineInfo.size();
+ FunctionMessage.line_number_table = &*LineInfo.begin();
+ } else {
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ }
+
+ Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[FnStart] = FunctionMessage.method_id;
+}
+
+void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+ MethodIDMap::iterator I = MethodIDs.find(FnStart);
+ if (I != MethodIDs.end()) {
+ Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+ MethodIDs.erase(I);
+ }
+}
+
+} // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createIntelJITEventListener() {
+ static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper(
+ new IntelJITEventsWrapper);
+ return new IntelJITEventListener(*JITProfilingWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createIntelJITEventListener(
+ IntelJITEventsWrapper* TestImpl) {
+ return new IntelJITEventListener(*TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
new file mode 100644
index 000000000000..80d227326441
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/JITProfileAmplifier/LLVMBuild.txt --*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = IntelJITEvents
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile
new file mode 100644
index 000000000000..ba75ac6f6462
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/Makefile
@@ -0,0 +1,17 @@
+##===- lib/ExecutionEngine/JITProfile/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIntelJITEvents
+
+include $(LEVEL)/Makefile.config
+
+SOURCES := IntelJITEventListener.cpp
+CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index 4fb58c2e3783..d331f830b62e 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -12,14 +12,6 @@ add_llvm_library(LLVMInterpreter
Interpreter.cpp
)
-add_llvm_library_dependencies(LLVMInterpreter
- LLVMCodeGen
- LLVMCore
- LLVMExecutionEngine
- LLVMSupport
- LLVMTarget
- )
-
if( LLVM_ENABLE_FFI )
target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
endif()
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 27917da07a2c..af47be9c5b56 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -625,24 +625,6 @@ void Interpreter::visitReturnInst(ReturnInst &I) {
popStackAndReturnValueToCaller(RetTy, Result);
}
-void Interpreter::visitUnwindInst(UnwindInst &I) {
- // Unwind stack
- Instruction *Inst;
- do {
- ECStack.pop_back();
- if (ECStack.empty())
- report_fatal_error("Empty stack during unwind!");
- Inst = ECStack.back().Caller.getInstruction();
- } while (!(Inst && isa<InvokeInst>(Inst)));
-
- // Return from invoke
- ExecutionContext &InvokingSF = ECStack.back();
- InvokingSF.Caller = CallSite();
-
- // Go to exceptional destination BB of invoke instruction
- SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
-}
-
void Interpreter::visitUnreachableInst(UnreachableInst &I) {
report_fatal_error("Program executed an 'unreachable' instruction!");
}
@@ -668,12 +650,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
// Check to see if any of the cases match...
BasicBlock *Dest = 0;
- unsigned NumCases = I.getNumCases();
- // Skip the first item since that's the default case.
- for (unsigned i = 1; i < NumCases; ++i) {
- GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF);
+ for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
+ GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
- Dest = cast<BasicBlock>(I.getSuccessor(i));
+ Dest = cast<BasicBlock>(i.getCaseSuccessor());
break;
}
}
@@ -1253,8 +1233,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
break;
default:
dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
- llvm_unreachable(0);
- return GenericValue();
+ llvm_unreachable("Unhandled ConstantExpr");
}
return Dest;
}
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 055875c9456a..7a206ebf73d7 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -94,15 +94,16 @@ static ExFunc lookupFunction(const Function *F) {
FunctionType *FT = F->getFunctionType();
for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
ExtName += getTypeID(FT->getContainedType(i));
- ExtName + "_" + F->getNameStr();
+ ExtName += "_" + F->getName().str();
sys::ScopedLock Writer(*FunctionsLock);
ExFunc FnPtr = FuncNames[ExtName];
if (FnPtr == 0)
- FnPtr = FuncNames["lle_X_" + F->getNameStr()];
+ FnPtr = FuncNames["lle_X_" + F->getName().str()];
if (FnPtr == 0) // Try calling a generic function... if it exists...
FnPtr = (ExFunc)(intptr_t)
- sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr());
+ sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
+ F->getName().str());
if (FnPtr != 0)
ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later
return FnPtr;
@@ -296,14 +297,8 @@ GenericValue Interpreter::callExternalFunction(Function *F,
// Functions "exported" to the running application...
//
-// Visual Studio warns about returning GenericValue in extern "C" linkage
-#ifdef _MSC_VER
- #pragma warning(disable : 4190)
-#endif
-
-extern "C" { // Don't add C++ manglings to llvm mangling :)
-
// void atexit(Function*)
+static
GenericValue lle_X_atexit(FunctionType *FT,
const std::vector<GenericValue> &Args) {
assert(Args.size() == 1);
@@ -314,6 +309,7 @@ GenericValue lle_X_atexit(FunctionType *FT,
}
// void exit(int)
+static
GenericValue lle_X_exit(FunctionType *FT,
const std::vector<GenericValue> &Args) {
TheInterpreter->exitCalled(Args[0]);
@@ -321,6 +317,7 @@ GenericValue lle_X_exit(FunctionType *FT,
}
// void abort(void)
+static
GenericValue lle_X_abort(FunctionType *FT,
const std::vector<GenericValue> &Args) {
//FIXME: should we report or raise here?
@@ -331,6 +328,7 @@ GenericValue lle_X_abort(FunctionType *FT,
// int sprintf(char *, const char *, ...) - a very rough implementation to make
// output useful.
+static
GenericValue lle_X_sprintf(FunctionType *FT,
const std::vector<GenericValue> &Args) {
char *OutputBuffer = (char *)GVTOP(Args[0]);
@@ -408,11 +406,11 @@ GenericValue lle_X_sprintf(FunctionType *FT,
break;
}
}
- return GV;
}
// int printf(const char *, ...) - a very rough implementation to make output
// useful.
+static
GenericValue lle_X_printf(FunctionType *FT,
const std::vector<GenericValue> &Args) {
char Buffer[10000];
@@ -425,6 +423,7 @@ GenericValue lle_X_printf(FunctionType *FT,
}
// int sscanf(const char *format, ...);
+static
GenericValue lle_X_sscanf(FunctionType *FT,
const std::vector<GenericValue> &args) {
assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
@@ -440,6 +439,7 @@ GenericValue lle_X_sscanf(FunctionType *FT,
}
// int scanf(const char *format, ...);
+static
GenericValue lle_X_scanf(FunctionType *FT,
const std::vector<GenericValue> &args) {
assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
@@ -456,6 +456,7 @@ GenericValue lle_X_scanf(FunctionType *FT,
// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
// output useful.
+static
GenericValue lle_X_fprintf(FunctionType *FT,
const std::vector<GenericValue> &Args) {
assert(Args.size() >= 2);
@@ -469,14 +470,6 @@ GenericValue lle_X_fprintf(FunctionType *FT,
return GV;
}
-} // End extern "C"
-
-// Done with externals; turn the warning back on
-#ifdef _MSC_VER
- #pragma warning(default: 4190)
-#endif
-
-
void Interpreter::initializeExternalFunctions() {
sys::ScopedLock Writer(*FunctionsLock);
FuncNames["lle_X_atexit"] = lle_X_atexit;
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index ee2b4596f38f..28c5775ab468 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -115,6 +115,12 @@ public:
virtual GenericValue runFunction(Function *F,
const std::vector<GenericValue> &ArgValues);
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) {
+ // FIXME: not implemented.
+ return 0;
+ }
+
/// recompileAndRelinkFunction - For the interpreter, functions are always
/// up-to-date.
///
@@ -165,7 +171,6 @@ public:
void visitCallSite(CallSite CS);
void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
- void visitUnwindInst(UnwindInst &I);
void visitUnreachableInst(UnreachableInst &I);
void visitShl(BinaryOperator &I);
diff --git a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
new file mode 100644
index 000000000000..327b320afe2b
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/Interpreter/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Interpreter
+parent = ExecutionEngine
+required_libraries = CodeGen Core ExecutionEngine Support Target
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index 598e50e79460..52bb38970db9 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -2,19 +2,8 @@
add_definitions(-DENABLE_X86_JIT)
add_llvm_library(LLVMJIT
- Intercept.cpp
JIT.cpp
- JITDebugRegisterer.cpp
JITDwarfEmitter.cpp
JITEmitter.cpp
JITMemoryManager.cpp
- OProfileJITEventListener.cpp
- )
-
-add_llvm_library_dependencies(LLVMJIT
- LLVMCore
- LLVMExecutionEngine
- LLVMRuntimeDyld
- LLVMSupport
- LLVMTarget
)
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
deleted file mode 100644
index 2251a8e6b077..000000000000
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call. This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially. For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
- while (!AtExitHandlers.empty()) {
- void (*Fn)() = AtExitHandlers.back();
- AtExitHandlers.pop_back();
- Fn();
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number. On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
- StatSymbols() {
- sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
- sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
- sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
- sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
- sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
- sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
- sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
- sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
- sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
- }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
- runAtExitHandlers(); // Run atexit handlers...
- exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
- AtExitHandlers.push_back(Fn); // Take note of atexit handler...
- return 0; // Always successful
-}
-
-static int jit_noop() {
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface. As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *JIT::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
- if (!isSymbolSearchingDisabled()) {
- // Check to see if this is one of the functions we want to intercept. Note,
- // we cast to intptr_t here to silence a -pedantic warning that complains
- // about casting a function pointer to a normal pointer.
- if (Name == "exit") return (void*)(intptr_t)&jit_exit;
- if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
- // We should not invoke parent's ctors/dtors from generated main()!
- // On Mingw and Cygwin, the symbol __main is resolved to
- // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
- // (and register wrong callee's dtors with atexit(3)).
- // We expect ExecutionEngine::runStaticConstructorsDestructors()
- // is called before ExecutionEngine::runFunctionAsMain() is called.
- if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
- const char *NameStr = Name.c_str();
- // If this is an asm specifier, skip the sentinal.
- if (NameStr[0] == 1) ++NameStr;
-
- // If it's an external function, look it up in the process image...
- void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr) return Ptr;
-
- // If it wasn't found and if it starts with an underscore ('_') character,
- // and has an asm specifier, try again without the underscore.
- if (Name[0] == 1 && NameStr[0] == '_') {
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr) return Ptr;
- }
-
- // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
- // are references to hidden visibility symbols that dlsym cannot resolve.
- // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
- if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
- memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
- // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
- // This mirrors logic in libSystemStubs.a.
- std::string Prefix = std::string(Name.begin(), Name.end()-9);
- if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
- return Ptr;
- if (void *Ptr = getPointerToNamedFunction(Prefix, false))
- return Ptr;
- }
-#endif
- }
-
- /// If a LazyFunctionCreator is installed, use it to get/create the function.
- if (LazyFunctionCreator)
- if (void *RP = LazyFunctionCreator(Name))
- return RP;
-
- if (AbortOnFailure) {
- report_fatal_error("Program used external function '"+Name+
- "' which could not be resolved!");
- }
- return 0;
-}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index d773009065b5..a942299f3bbd 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineCodeInfo.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetJITInfo.h"
@@ -206,7 +207,6 @@ void DarwinRegisterFrame(void* FrameBegin) {
ExecutionEngine *JIT::createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) {
// Try to register the program as a source of symbols to resolve against.
@@ -216,7 +216,7 @@ ExecutionEngine *JIT::createJIT(Module *M,
// If the target supports JIT code generation, create the JIT.
if (TargetJITInfo *TJ = TM->getJITInfo()) {
- return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+ return new JIT(M, *TM, *TJ, JMM, GVsWithCode);
} else {
if (ErrorStr)
*ErrorStr = "target does not support JIT code generation";
@@ -268,9 +268,10 @@ extern "C" {
}
JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
- : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
- isAlreadyCodeGenerating(false) {
+ JITMemoryManager *jmm, bool GVsWithCode)
+ : ExecutionEngine(M), TM(tm), TJI(tji),
+ JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()),
+ AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) {
setTargetData(TM.getTargetData());
jitstate = new JITState(M);
@@ -288,7 +289,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
// Turn the machine code intermediate representation into bytes in memory that
// may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -323,6 +324,7 @@ JIT::~JIT() {
AllJits->Remove(this);
delete jitstate;
delete JCE;
+ // JMM is a ownership of JCE, so we no need delete JMM here.
delete &TM;
}
@@ -341,7 +343,7 @@ void JIT::addModule(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -372,7 +374,7 @@ bool JIT::removeModule(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -476,7 +478,6 @@ GenericValue JIT::runFunction(Function *F,
case Type::FP128TyID:
case Type::PPC_FP128TyID:
llvm_unreachable("long double not supported yet");
- return rv;
case Type::PointerTyID:
return PTOGV(((void*(*)())(intptr_t)FPtr)());
}
@@ -708,12 +709,32 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
if (I != getBasicBlockAddressMap(locked).end()) {
return I->second;
} else {
- assert(0 && "JIT does not have BB address for address-of-label, was"
- " it eliminated by optimizer?");
- return 0;
+ llvm_unreachable("JIT does not have BB address for address-of-label, was"
+ " it eliminated by optimizer?");
}
}
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure){
+ if (!isSymbolSearchingDisabled()) {
+ void *ptr = JMM->getPointerToNamedFunction(Name, false);
+ if (ptr)
+ return ptr;
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+
/// getOrEmitGlobalVariable - Return the address of the specified global
/// variable, possibly emitting it to memory if needed. This is used by the
/// Emitter.
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 92dcb0e99586..2ae155bebf40 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -58,6 +58,7 @@ class JIT : public ExecutionEngine {
TargetMachine &TM; // The current target we are compiling to
TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
JITCodeEmitter *JCE; // JCE object
+ JITMemoryManager *JMM;
std::vector<JITEventListener*> EventListeners;
/// AllocateGVsWithCode - Some applications require that global variables and
@@ -78,8 +79,7 @@ class JIT : public ExecutionEngine {
JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
- bool AllocateGVsWithCode);
+ JITMemoryManager *JMM, bool AllocateGVsWithCode);
public:
~JIT();
@@ -118,15 +118,15 @@ public:
const std::vector<GenericValue> &ArgValues);
/// getPointerToNamedFunction - This method returns the address of the
- /// specified function by using the dlsym function call. As such it is only
+ /// specified function by using the MemoryManager. As such it is only
/// useful for resolving library symbols, not code generated symbols.
///
/// If AbortOnFailure is false and no function with the given name is
/// found, this function silently returns a null pointer. Otherwise,
/// it prints a message to stderr and aborts.
///
- void *getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure = true);
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
// CompilationCallback - Invoked the first time that a call site is found,
// which causes lazy compilation of the target function.
@@ -185,7 +185,6 @@ public:
static ExecutionEngine *createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM);
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
deleted file mode 100644
index e71c20b89fda..000000000000
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITDebugRegisterer object that is used by the JIT to
-// register debug info with debuggers like GDB.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JITDebugRegisterer.h"
-#include "../../CodeGen/ELF.h"
-#include "../../CodeGen/ELFWriter.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
-#include <string>
-
-namespace llvm {
-
-// This must be kept in sync with gdb/gdb/jit.h .
-extern "C" {
-
- // Debuggers puts a breakpoint in this function.
- LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
-
- // We put information about the JITed function in this global, which the
- // debugger reads. Make sure to specify the version statically, because the
- // debugger checks the version before we can set it during runtime.
- struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
-
-}
-
-namespace {
-
- /// JITDebugLock - Used to serialize all code registration events, since they
- /// modify global variables.
- sys::Mutex JITDebugLock;
-
-}
-
-JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { }
-
-JITDebugRegisterer::~JITDebugRegisterer() {
- // Free all ELF memory.
- for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end();
- I != E; ++I) {
- // Call the private method that doesn't update the map so our iterator
- // doesn't break.
- UnregisterFunctionInternal(I);
- }
- FnMap.clear();
-}
-
-std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
- // Stack allocate an empty module with an empty LLVMContext for the ELFWriter
- // API. We don't use the real module because then the ELFWriter would write
- // out unnecessary GlobalValues during finalization.
- LLVMContext Context;
- Module M("", Context);
-
- // Make a buffer for the ELF in memory.
- std::string Buffer;
- raw_string_ostream O(Buffer);
- ELFWriter EW(O, TM);
- EW.doInitialization(M);
-
- // Copy the binary into the .text section. This isn't necessary, but it's
- // useful to be able to disassemble the ELF by hand.
- ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
- Text.Addr = (uint64_t)I.FnStart;
- // TODO: We could eliminate this copy if we somehow used a pointer/size pair
- // instead of a vector.
- Text.getData().assign(I.FnStart, I.FnEnd);
-
- // Copy the exception handling call frame information into the .eh_frame
- // section. This allows GDB to get a good stack trace, particularly on
- // linux x86_64. Mark this as a PROGBITS section that needs to be loaded
- // into memory at runtime.
- ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC);
- // Pointers in the DWARF EH info are all relative to the EH frame start,
- // which is stored here.
- EH.Addr = (uint64_t)I.EhStart;
- // TODO: We could eliminate this copy if we somehow used a pointer/size pair
- // instead of a vector.
- EH.getData().assign(I.EhStart, I.EhEnd);
-
- // Add this single function to the symbol table, so the debugger prints the
- // name instead of '???'. We give the symbol default global visibility.
- ELFSym *FnSym = ELFSym::getGV(F,
- ELF::STB_GLOBAL,
- ELF::STT_FUNC,
- ELF::STV_DEFAULT);
- FnSym->SectionIdx = Text.SectionIdx;
- FnSym->Size = I.FnEnd - I.FnStart;
- FnSym->Value = 0; // Offset from start of section.
- EW.SymbolList.push_back(FnSym);
-
- EW.doFinalization(M);
- O.flush();
-
- // When trying to debug why GDB isn't getting the debug info right, it's
- // awfully helpful to write the object file to disk so that it can be
- // inspected with readelf and objdump.
- if (JITEmitDebugInfoToDisk) {
- std::string Filename;
- raw_string_ostream O2(Filename);
- O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o";
- O2.flush();
- std::string Errors;
- raw_fd_ostream O3(Filename.c_str(), Errors);
- O3 << Buffer;
- O3.close();
- }
-
- return Buffer;
-}
-
-void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
- // TODO: Support non-ELF platforms.
- if (!TM.getELFWriterInfo())
- return;
-
- std::string Buffer = MakeELF(F, I);
-
- jit_code_entry *JITCodeEntry = new jit_code_entry();
- JITCodeEntry->symfile_addr = Buffer.c_str();
- JITCodeEntry->symfile_size = Buffer.size();
-
- // Add a mapping from F to the entry and buffer, so we can delete this
- // info later.
- FnMap[F] = std::make_pair(Buffer, JITCodeEntry);
-
- // Acquire the lock and do the registration.
- {
- MutexGuard locked(JITDebugLock);
- __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
-
- // Insert this entry at the head of the list.
- JITCodeEntry->prev_entry = NULL;
- jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
- JITCodeEntry->next_entry = NextEntry;
- if (NextEntry != NULL) {
- NextEntry->prev_entry = JITCodeEntry;
- }
- __jit_debug_descriptor.first_entry = JITCodeEntry;
- __jit_debug_descriptor.relevant_entry = JITCodeEntry;
- __jit_debug_register_code();
- }
-}
-
-void JITDebugRegisterer::UnregisterFunctionInternal(
- RegisteredFunctionsMap::iterator I) {
- jit_code_entry *&JITCodeEntry = I->second.second;
-
- // Acquire the lock and do the unregistration.
- {
- MutexGuard locked(JITDebugLock);
- __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
-
- // Remove the jit_code_entry from the linked list.
- jit_code_entry *PrevEntry = JITCodeEntry->prev_entry;
- jit_code_entry *NextEntry = JITCodeEntry->next_entry;
- if (NextEntry) {
- NextEntry->prev_entry = PrevEntry;
- }
- if (PrevEntry) {
- PrevEntry->next_entry = NextEntry;
- } else {
- assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
- __jit_debug_descriptor.first_entry = NextEntry;
- }
-
- // Tell GDB which entry we removed, and unregister the code.
- __jit_debug_descriptor.relevant_entry = JITCodeEntry;
- __jit_debug_register_code();
- }
-
- delete JITCodeEntry;
- JITCodeEntry = NULL;
-
- // Free the ELF file in memory.
- std::string &Buffer = I->second.first;
- Buffer.clear();
-}
-
-void JITDebugRegisterer::UnregisterFunction(const Function *F) {
- // TODO: Support non-ELF platforms.
- if (!TM.getELFWriterInfo())
- return;
-
- RegisteredFunctionsMap::iterator I = FnMap.find(F);
- if (I == FnMap.end()) return;
- UnregisterFunctionInternal(I);
- FnMap.erase(I);
-}
-
-} // end namespace llvm
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
deleted file mode 100644
index dce506bbfefd..000000000000
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
+++ /dev/null
@@ -1,116 +0,0 @@
-//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITDebugRegisterer object that is used by the JIT to
-// register debug info with debuggers like GDB.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
-#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/DataTypes.h"
-#include <string>
-
-// This must be kept in sync with gdb/gdb/jit.h .
-extern "C" {
-
- typedef enum {
- JIT_NOACTION = 0,
- JIT_REGISTER_FN,
- JIT_UNREGISTER_FN
- } jit_actions_t;
-
- struct jit_code_entry {
- struct jit_code_entry *next_entry;
- struct jit_code_entry *prev_entry;
- const char *symfile_addr;
- uint64_t symfile_size;
- };
-
- struct jit_descriptor {
- uint32_t version;
- // This should be jit_actions_t, but we want to be specific about the
- // bit-width.
- uint32_t action_flag;
- struct jit_code_entry *relevant_entry;
- struct jit_code_entry *first_entry;
- };
-
-}
-
-namespace llvm {
-
-class ELFSection;
-class Function;
-class TargetMachine;
-
-
-/// This class encapsulates information we want to send to the debugger.
-///
-struct DebugInfo {
- uint8_t *FnStart;
- uint8_t *FnEnd;
- uint8_t *EhStart;
- uint8_t *EhEnd;
-
- DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {}
-};
-
-typedef DenseMap< const Function*, std::pair<std::string, jit_code_entry*> >
- RegisteredFunctionsMap;
-
-/// This class registers debug info for JITed code with an attached debugger.
-/// Without proper debug info, GDB can't do things like source level debugging
-/// or even produce a proper stack trace on linux-x86_64. To use this class,
-/// whenever a function is JITed, create a DebugInfo struct and pass it to the
-/// RegisterFunction method. The method will then do whatever is necessary to
-/// inform the debugger about the JITed function.
-class JITDebugRegisterer {
-
- TargetMachine &TM;
-
- /// FnMap - A map of functions that have been registered to the associated
- /// temporary files. Used for cleanup.
- RegisteredFunctionsMap FnMap;
-
- /// MakeELF - Builds the ELF file in memory and returns a std::string that
- /// contains the ELF.
- std::string MakeELF(const Function *F, DebugInfo &I);
-
-public:
- JITDebugRegisterer(TargetMachine &tm);
-
- /// ~JITDebugRegisterer - Unregisters all code and frees symbol files.
- ///
- ~JITDebugRegisterer();
-
- /// RegisterFunction - Register debug info for the given function with an
- /// attached debugger. Clients must call UnregisterFunction on all
- /// registered functions before deleting them to free the associated symbol
- /// file and unregister it from the debugger.
- void RegisterFunction(const Function *F, DebugInfo &I);
-
- /// UnregisterFunction - Unregister the debug info for the given function
- /// from the debugger and free associated memory.
- void UnregisterFunction(const Function *F);
-
-private:
- /// UnregisterFunctionInternal - Unregister the debug info for the given
- /// function from the debugger and delete any temporary files. The private
- /// version of this method does not remove the function from FnMap so that it
- /// can be called while iterating over FnMap.
- void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I);
-
-};
-
-} // end namespace llvm
-
-#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 8f84ac7b4126..42a136e72d45 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -313,7 +313,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- MayThrow |= MI->getDesc().isCall();
+ MayThrow |= MI->isCall();
continue;
}
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 24020ee6d689..504c8bdffd1b 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "jit"
#include "JIT.h"
-#include "JITDebugRegisterer.h"
#include "JITDwarfEmitter.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Constants.h"
@@ -77,8 +76,8 @@ namespace {
struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
typedef JITResolverState *ExtraData;
static void onRAUW(JITResolverState *, Value *Old, Value *New) {
- assert(false && "The JIT doesn't know how to handle a"
- " RAUW on a value it has emitted.");
+ llvm_unreachable("The JIT doesn't know how to handle a"
+ " RAUW on a value it has emitted.");
}
};
@@ -324,9 +323,6 @@ namespace {
/// DE - The dwarf emitter for the jit.
OwningPtr<JITDwarfEmitter> DE;
- /// DR - The debug registerer for the jit.
- OwningPtr<JITDebugRegisterer> DR;
-
/// LabelLocations - This vector is a mapping from Label ID's to their
/// address.
DenseMap<MCSymbol*, uintptr_t> LabelLocations;
@@ -362,22 +358,22 @@ namespace {
/// Instance of the JIT
JIT *TheJIT;
+ bool JITExceptionHandling;
+
public:
JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
: SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
- EmittedFunctions(this), TheJIT(&jit) {
+ EmittedFunctions(this), TheJIT(&jit),
+ JITExceptionHandling(TM.Options.JITExceptionHandling) {
MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
if (jit.getJITInfo().needsGOT()) {
MemMgr->AllocateGOT();
DEBUG(dbgs() << "JIT is managing a GOT\n");
}
- if (JITExceptionHandling || JITEmitDebugInfo) {
+ if (JITExceptionHandling) {
DE.reset(new JITDwarfEmitter(jit));
}
- if (JITEmitDebugInfo) {
- DR.reset(new JITDebugRegisterer(TM));
- }
}
~JITEmitter() {
delete MemMgr;
@@ -968,7 +964,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
}
});
- if (JITExceptionHandling || JITEmitDebugInfo) {
+ if (JITExceptionHandling) {
uintptr_t ActualSize = 0;
SavedBufferBegin = BufferBegin;
SavedBufferEnd = BufferEnd;
@@ -983,7 +979,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
EhStart);
MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
FrameRegister);
- uint8_t *EhEnd = CurBufferPtr;
BufferBegin = SavedBufferBegin;
BufferEnd = SavedBufferEnd;
CurBufferPtr = SavedCurBufferPtr;
@@ -991,15 +986,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
if (JITExceptionHandling) {
TheJIT->RegisterTable(F.getFunction(), FrameRegister);
}
-
- if (JITEmitDebugInfo) {
- DebugInfo I;
- I.FnStart = FnStart;
- I.FnEnd = FnEnd;
- I.EhStart = EhStart;
- I.EhEnd = EhEnd;
- DR->RegisterFunction(F.getFunction(), I);
- }
}
if (MMI)
@@ -1037,17 +1023,13 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
EmittedFunctions.erase(Emitted);
}
- if(JITExceptionHandling) {
+ if (JITExceptionHandling) {
TheJIT->DeregisterTable(F);
}
-
- if (JITEmitDebugInfo) {
- DR->UnregisterFunction(F);
- }
}
-void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
if (BufferBegin)
return JITCodeEmitter::allocateSpace(Size, Alignment);
@@ -1059,7 +1041,7 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
return CurBufferPtr;
}
-void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
// Delegate this call through the memory manager.
return MemMgr->allocateGlobal(Size, Alignment);
}
@@ -1179,6 +1161,9 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
}
break;
}
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ llvm_unreachable(
+ "JT Info emission not implemented for GPRel64BlockAddress yet.");
}
}
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index eec23cec0af9..2d1775c05c10 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -23,10 +23,22 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
#include <vector>
#include <cassert>
#include <climits>
#include <cstring>
+
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
using namespace llvm;
STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
@@ -314,6 +326,11 @@ namespace {
/// should allocate a separate slab.
static const size_t DefaultSizeThreshold;
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call.
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
void AllocateGOT();
// Testing methods.
@@ -441,6 +458,50 @@ namespace {
return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
}
+ /// allocateCodeSection - Allocate memory for a code section.
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID) {
+ // FIXME: Alignement handling.
+ FreeRangeHeader* candidateBlock = FreeMemoryList;
+ FreeRangeHeader* head = FreeMemoryList;
+ FreeRangeHeader* iter = head->Next;
+
+ uintptr_t largest = candidateBlock->BlockSize;
+
+ // Search for the largest free block.
+ while (iter != head) {
+ if (iter->BlockSize > largest) {
+ largest = iter->BlockSize;
+ candidateBlock = iter;
+ }
+ iter = iter->Next;
+ }
+
+ largest = largest - sizeof(MemoryRangeHeader);
+
+ // If this block isn't big enough for the allocation desired, allocate
+ // another block of memory and add it to the free list.
+ if (largest < Size || largest <= FreeRangeHeader::getMinBlockSize()) {
+ DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+ candidateBlock = allocateNewCodeSlab((size_t)Size);
+ }
+
+ // Select this candidate block for allocation
+ CurBlock = candidateBlock;
+
+ // Allocate the entire memory block.
+ FreeMemoryList = candidateBlock->AllocateBlock();
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size);
+ return (uint8_t *)(CurBlock + 1);
+ }
+
+ /// allocateDataSection - Allocate memory for a data section.
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID) {
+ return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+ }
+
/// startExceptionTable - Use startFunctionBody to allocate memory for the
/// function's exception table.
uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
@@ -713,6 +774,139 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
return true;
}
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+//===----------------------------------------------------------------------===//
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface. As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // try again without the underscore.
+ if (NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
+
+
+
JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
return new DefaultJITMemoryManager();
}
diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt
new file mode 100644
index 000000000000..ca2a56537aab
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = JIT
+parent = ExecutionEngine
+required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
new file mode 100644
index 000000000000..1f94a4fb9ecd
--- /dev/null
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/ExecutionEngine/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT
+
+[component_0]
+type = Library
+name = ExecutionEngine
+parent = Libraries
+required_libraries = Core MC Support Target
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index aae8a1b2c521..fef71768b493 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,12 +1,4 @@
add_llvm_library(LLVMMCJIT
MCJIT.cpp
- Intercept.cpp
- )
-
-add_llvm_library_dependencies(LLVMMCJIT
- LLVMCore
- LLVMExecutionEngine
- LLVMRuntimeDyld
- LLVMSupport
- LLVMTarget
+ MCJITMemoryManager.cpp
)
diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp
deleted file mode 100644
index f83f4282e016..000000000000
--- a/lib/ExecutionEngine/MCJIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call. This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially. For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCJIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
- while (!AtExitHandlers.empty()) {
- void (*Fn)() = AtExitHandlers.back();
- AtExitHandlers.pop_back();
- Fn();
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number. On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
- StatSymbols() {
- sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
- sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
- sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
- sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
- sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
- sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
- sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
- sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
- sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
- }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
- runAtExitHandlers(); // Run atexit handlers...
- exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
- AtExitHandlers.push_back(Fn); // Take note of atexit handler...
- return 0; // Always successful
-}
-
-static int jit_noop() {
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface. As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *MCJIT::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
- if (!isSymbolSearchingDisabled()) {
- // Check to see if this is one of the functions we want to intercept. Note,
- // we cast to intptr_t here to silence a -pedantic warning that complains
- // about casting a function pointer to a normal pointer.
- if (Name == "exit") return (void*)(intptr_t)&jit_exit;
- if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
- // We should not invoke parent's ctors/dtors from generated main()!
- // On Mingw and Cygwin, the symbol __main is resolved to
- // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
- // (and register wrong callee's dtors with atexit(3)).
- // We expect ExecutionEngine::runStaticConstructorsDestructors()
- // is called before ExecutionEngine::runFunctionAsMain() is called.
- if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
- const char *NameStr = Name.c_str();
- // If this is an asm specifier, skip the sentinal.
- if (NameStr[0] == 1) ++NameStr;
-
- // If it's an external function, look it up in the process image...
- void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr) return Ptr;
-
- // If it wasn't found and if it starts with an underscore ('_') character,
- // and has an asm specifier, try again without the underscore.
- if (Name[0] == 1 && NameStr[0] == '_') {
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr) return Ptr;
- }
-
- // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
- // are references to hidden visibility symbols that dlsym cannot resolve.
- // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
- if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
- memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
- // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
- // This mirrors logic in libSystemStubs.a.
- std::string Prefix = std::string(Name.begin(), Name.end()-9);
- if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
- return Ptr;
- if (void *Ptr = getPointerToNamedFunction(Prefix, false))
- return Ptr;
- }
-#endif
- }
-
- /// If a LazyFunctionCreator is installed, use it to get/create the function.
- if (LazyFunctionCreator)
- if (void *RP = LazyFunctionCreator(Name))
- return RP;
-
- if (AbortOnFailure) {
- report_fatal_error("Program used external function '"+Name+
- "' which could not be resolved!");
- }
- return 0;
-}
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
new file mode 100644
index 000000000000..90f4d2f75e24
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/MCJIT/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCJIT
+parent = ExecutionEngine
+required_libraries = Core ExecutionEngine RuntimeDyld Support Target
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 7c8a740dc862..44f89cf78309 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -36,7 +36,6 @@ extern "C" void LLVMLinkInMCJIT() {
ExecutionEngine *MCJIT::createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) {
// Try to register the program as a source of symbols to resolve against.
@@ -46,8 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
// If the target supports JIT code generation, create the JIT.
if (TargetJITInfo *TJ = TM->getJITInfo())
- return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel,
- GVsWithCode);
+ return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode);
if (ErrorStr)
*ErrorStr = "target does not support JIT code generation";
@@ -55,8 +53,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
}
MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
- RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
- bool AllocateGVsWithCode)
+ RTDyldMemoryManager *MM, bool AllocateGVsWithCode)
: ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
setTargetData(TM->getTargetData());
@@ -64,7 +61,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) {
+ if (TM->addPassesToEmitMC(PM, Ctx, OS, false)) {
report_fatal_error("Target does not support MC emission!");
}
@@ -77,9 +74,9 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
OS.flush();
// Load the object into the dynamic linker.
- // FIXME: It would be nice to avoid making yet another copy.
- MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(),
- Buffer.size()));
+ MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
+ Buffer.size()),
+ "", false);
if (Dyld.loadObject(MB))
report_fatal_error(Dyld.getErrorString());
// Resolve any relocations.
@@ -88,11 +85,11 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
MCJIT::~MCJIT() {
delete MemMgr;
+ delete TM;
}
void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
report_fatal_error("not yet implemented");
- return 0;
}
void *MCJIT::getPointerToFunction(Function *F) {
@@ -211,12 +208,30 @@ GenericValue MCJIT::runFunction(Function *F,
case Type::FP128TyID:
case Type::PPC_FP128TyID:
llvm_unreachable("long double not supported yet");
- return rv;
case Type::PointerTyID:
return PTOGV(((void*(*)())(intptr_t)FPtr)());
}
}
- assert(0 && "Full-featured argument passing not supported yet!");
- return GenericValue();
+ llvm_unreachable("Full-featured argument passing not supported yet!");
+}
+
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure){
+ if (!isSymbolSearchingDisabled() && MemMgr) {
+ void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
+ if (ptr)
+ return ptr;
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index b64c21a97360..2b3df9884eb2 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -24,8 +24,7 @@ namespace llvm {
class MCJIT : public ExecutionEngine {
MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
- RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
- bool AllocateGVsWithCode);
+ RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode);
TargetMachine *TM;
MCContext *Ctx;
@@ -66,8 +65,17 @@ public:
/// found, this function silently returns a null pointer. Otherwise,
/// it prints a message to stderr and aborts.
///
- void *getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure = true);
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
+ /// mapSectionAddress - map a section to its target address space value.
+ /// Map the address of a JIT section as returned from the memory manager
+ /// to the address in the target process as the running code will see it.
+ /// This is the address which will be used for relocation resolution.
+ virtual void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) {
+ Dyld.mapSectionAddress(LocalAddress, TargetAddress);
+ }
+
/// @}
/// @name (Private) Registration Interfaces
/// @{
@@ -79,7 +87,6 @@ public:
static ExecutionEngine *createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM);
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp
new file mode 100644
index 000000000000..457fe5e3ef06
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp
@@ -0,0 +1,14 @@
+//==-- MCJITMemoryManager.cpp - Definition for the Memory Manager -*-C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJITMemoryManager.h"
+
+using namespace llvm;
+
+void MCJITMemoryManager::anchor() { }
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
index 40bc031a0771..a68949aa41c8 100644
--- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -21,45 +21,30 @@ namespace llvm {
// and the RuntimeDyld interface that maps objects, by name, onto their
// matching LLVM IR counterparts in the module(s) being compiled.
class MCJITMemoryManager : public RTDyldMemoryManager {
+ virtual void anchor();
JITMemoryManager *JMM;
// FIXME: Multiple modules.
Module *M;
public:
- MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : JMM(jmm), M(m) {}
+ MCJITMemoryManager(JITMemoryManager *jmm, Module *m) :
+ JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()), M(m) {}
+ // We own the JMM, so make sure to delete it.
+ ~MCJITMemoryManager() { delete JMM; }
+
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID) {
+ return JMM->allocateSpace(Size, Alignment);
+ }
- // Allocate ActualSize bytes, or more, for the named function. Return
- // a pointer to the allocated memory and update Size to reflect how much
- // memory was acutally allocated.
- uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
- // FIXME: This should really reference the MCAsmInfo to get the global
- // prefix.
- if (Name[0] == '_') ++Name;
- Function *F = M->getFunction(Name);
- // Some ObjC names have a prefixed \01 in the IR. If we failed to find
- // the symbol and it's of the ObjC conventions (starts with "-"), try
- // prepending a \01 and see if we can find it that way.
- if (!F && Name[0] == '-')
- F = M->getFunction((Twine("\1") + Name).str());
- assert(F && "No matching function in JIT IR Module!");
- return JMM->startFunctionBody(F, Size);
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID) {
+ return JMM->allocateSpace(Size, Alignment);
}
- // Mark the end of the function, including how much of the allocated
- // memory was actually used.
- void endFunctionBody(const char *Name, uint8_t *FunctionStart,
- uint8_t *FunctionEnd) {
- // FIXME: This should really reference the MCAsmInfo to get the global
- // prefix.
- if (Name[0] == '_') ++Name;
- Function *F = M->getFunction(Name);
- // Some ObjC names have a prefixed \01 in the IR. If we failed to find
- // the symbol and it's of the ObjC conventions (starts with "-"), try
- // prepending a \01 and see if we can find it that way.
- if (!F && Name[0] == '-')
- F = M->getFunction((Twine("\1") + Name).str());
- assert(F && "No matching function in JIT IR Module!");
- JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+ virtual void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) {
+ return JMM->getPointerToNamedFunction(Name, AbortOnFailure);
}
};
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index 9a649a52cf9e..c26e0ada5bc1 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -8,6 +8,17 @@
##===----------------------------------------------------------------------===##
LEVEL = ../..
LIBRARYNAME = LLVMExecutionEngine
+
+include $(LEVEL)/Makefile.config
+
PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld
-include $(LEVEL)/Makefile.common
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+PARALLEL_DIRS += IntelJITEvents
+endif
+
+ifeq ($(USE_OPROFILE), 1)
+PARALLEL_DIRS += OProfileJIT
+endif
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
new file mode 100644
index 000000000000..d585136eb0ac
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+include_directories( ${LLVM_OPROFILE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMOProfileJIT
+ OProfileJITEventListener.cpp
+ OProfileWrapper.cpp
+ )
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
new file mode 100644
index 000000000000..4516dfa2dab2
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = OProfileJIT
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/Makefile b/lib/ExecutionEngine/OProfileJIT/Makefile
new file mode 100644
index 000000000000..fd3adce26c1f
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/Makefile
@@ -0,0 +1,18 @@
+##===- lib/ExecutionEngine/OProfileJIT/Makefile ------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMOProfileJIT
+
+include $(LEVEL)/Makefile.config
+
+SOURCES += OProfileJITEventListener.cpp \
+ OProfileWrapper.cpp
+CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 9a9ed6d33484..e6142e3678da 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -7,51 +7,55 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a JITEventListener object that calls into OProfile to tell
-// it about JITted functions. For now, we only record function names and sizes,
-// but eventually we'll also record line number information.
-//
-// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
-// definition of the interface we're using.
+// This file defines a JITEventListener object that uses OProfileWrapper to tell
+// oprofile about JITted functions, including source line information.
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
#define DEBUG_TYPE "oprofile-jit-event-listener"
#include "llvm/Function.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h"
-#include <stddef.h>
-using namespace llvm;
+#include "EventListenerCommon.h"
-#if USE_OPROFILE
+#include <dirent.h>
+#include <fcntl.h>
-#include <opagent.h>
+using namespace llvm;
+using namespace llvm::jitprofiling;
namespace {
class OProfileJITEventListener : public JITEventListener {
- op_agent_t Agent;
+ OProfileWrapper& Wrapper;
+
+ void initialize();
+
public:
- OProfileJITEventListener();
+ OProfileJITEventListener(OProfileWrapper& LibraryWrapper)
+ : Wrapper(LibraryWrapper) {
+ initialize();
+ }
+
~OProfileJITEventListener();
virtual void NotifyFunctionEmitted(const Function &F,
- void *FnStart, size_t FnSize,
- const EmittedFunctionDetails &Details);
+ void *FnStart, size_t FnSize,
+ const JITEvent_EmittedFunctionDetails &Details);
+
virtual void NotifyFreeingMachineCode(void *OldPtr);
};
-OProfileJITEventListener::OProfileJITEventListener()
- : Agent(op_open_agent()) {
- if (Agent == NULL) {
+void OProfileJITEventListener::initialize() {
+ if (!Wrapper.op_open_agent()) {
const std::string err_str = sys::StrError();
DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
} else {
@@ -60,8 +64,8 @@ OProfileJITEventListener::OProfileJITEventListener()
}
OProfileJITEventListener::~OProfileJITEventListener() {
- if (Agent != NULL) {
- if (op_close_agent(Agent) == -1) {
+ if (Wrapper.isAgentAvailable()) {
+ if (Wrapper.op_close_agent() == -1) {
const std::string err_str = sys::StrError();
DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
<< err_str << "\n");
@@ -71,22 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() {
}
}
-class FilenameCache {
- // Holds the filename of each Scope, so that we can pass a null-terminated
- // string into oprofile. Use an AssertingVH rather than a ValueMap because we
- // shouldn't be modifying any MDNodes while this map is alive.
- DenseMap<AssertingVH<MDNode>, std::string> Filenames;
-
- public:
- const char *getFilename(MDNode *Scope) {
- std::string &Filename = Filenames[Scope];
- if (Filename.empty()) {
- Filename = DIScope(Scope).getFilename();
- }
- return Filename.c_str();
- }
-};
-
static debug_line_info LineStartToOProfileFormat(
const MachineFunction &MF, FilenameCache &Filenames,
uintptr_t Address, DebugLoc Loc) {
@@ -103,9 +91,9 @@ static debug_line_info LineStartToOProfileFormat(
// Adds the just-emitted function to the symbol table.
void OProfileJITEventListener::NotifyFunctionEmitted(
const Function &F, void *FnStart, size_t FnSize,
- const EmittedFunctionDetails &Details) {
+ const JITEvent_EmittedFunctionDetails &Details) {
assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
- if (op_write_native_code(Agent, F.getName().data(),
+ if (Wrapper.op_write_native_code(F.getName().data(),
reinterpret_cast<uint64_t>(FnStart),
FnStart, FnSize) == -1) {
DEBUG(dbgs() << "Failed to tell OProfile about native function "
@@ -151,8 +139,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
// line info's address to include the start of the function.
LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
- if (op_write_debug_line_info(Agent, FnStart,
- LineInfo.size(), &*LineInfo.begin()) == -1) {
+ if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(),
+ &*LineInfo.begin()) == -1) {
DEBUG(dbgs()
<< "Failed to tell OProfile about line numbers for native function "
<< F.getName() << " at ["
@@ -164,7 +152,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
// Removes the being-deleted function from the symbol table.
void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
assert(FnStart && "Invalid function pointer");
- if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+ if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) {
DEBUG(dbgs()
<< "Failed to tell OProfile about unload of native function at "
<< FnStart << "\n");
@@ -174,19 +162,16 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
} // anonymous namespace.
namespace llvm {
-JITEventListener *createOProfileJITEventListener() {
- return new OProfileJITEventListener;
-}
+JITEventListener *JITEventListener::createOProfileJITEventListener() {
+ static OwningPtr<OProfileWrapper> JITProfilingWrapper(new OProfileWrapper);
+ return new OProfileJITEventListener(*JITProfilingWrapper);
}
-#else // USE_OPROFILE
-
-namespace llvm {
-// By defining this to return NULL, we can let clients call it unconditionally,
-// even if they haven't configured with the OProfile libraries.
-JITEventListener *createOProfileJITEventListener() {
- return NULL;
+// for testing
+JITEventListener *JITEventListener::createOProfileJITEventListener(
+ OProfileWrapper* TestImpl) {
+ return new OProfileJITEventListener(*TestImpl);
}
-} // namespace llvm
-#endif // USE_OPROFILE
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
new file mode 100644
index 000000000000..d67f5370b862
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -0,0 +1,263 @@
+//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface in OProfileWrapper.h. It is responsible
+// for loading the opagent dynamic library when the first call to an op_
+// function occurs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+
+#define DEBUG_TYPE "oprofile-wrapper"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/ADT/SmallString.h"
+
+#include <sstream>
+#include <cstring>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+namespace {
+
+// Global mutex to ensure a single thread initializes oprofile agent.
+llvm::sys::Mutex OProfileInitializationMutex;
+
+} // anonymous namespace
+
+namespace llvm {
+
+OProfileWrapper::OProfileWrapper()
+: Agent(0),
+ OpenAgentFunc(0),
+ CloseAgentFunc(0),
+ WriteNativeCodeFunc(0),
+ WriteDebugLineInfoFunc(0),
+ UnloadNativeCodeFunc(0),
+ MajorVersionFunc(0),
+ MinorVersionFunc(0),
+ IsOProfileRunningFunc(0),
+ Initialized(false) {
+}
+
+bool OProfileWrapper::initialize() {
+ using namespace llvm;
+ using namespace llvm::sys;
+
+ MutexGuard Guard(OProfileInitializationMutex);
+
+ if (Initialized)
+ return OpenAgentFunc != 0;
+
+ Initialized = true;
+
+ // If the oprofile daemon is not running, don't load the opagent library
+ if (!isOProfileRunning()) {
+ DEBUG(dbgs() << "OProfile daemon is not detected.\n");
+ return false;
+ }
+
+ std::string error;
+ if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) {
+ DEBUG(dbgs()
+ << "OProfile connector library libopagent.so could not be loaded: "
+ << error << "\n");
+ }
+
+ // Get the addresses of the opagent functions
+ OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_open_agent");
+ CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_close_agent");
+ WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code");
+ WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info");
+ UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code");
+ MajorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_major_version");
+ MinorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+ DynamicLibrary::SearchForAddressOfSymbol("op_minor_version");
+
+ // With missing functions, we can do nothing
+ if (!OpenAgentFunc
+ || !CloseAgentFunc
+ || !WriteNativeCodeFunc
+ || !WriteDebugLineInfoFunc
+ || !UnloadNativeCodeFunc) {
+ OpenAgentFunc = 0;
+ CloseAgentFunc = 0;
+ WriteNativeCodeFunc = 0;
+ WriteDebugLineInfoFunc = 0;
+ UnloadNativeCodeFunc = 0;
+ return false;
+ }
+
+ return true;
+}
+
+bool OProfileWrapper::isOProfileRunning() {
+ if (IsOProfileRunningFunc != 0)
+ return IsOProfileRunningFunc();
+ return checkForOProfileProcEntry();
+}
+
+bool OProfileWrapper::checkForOProfileProcEntry() {
+ DIR* ProcDir;
+
+ ProcDir = opendir("/proc");
+ if (!ProcDir)
+ return false;
+
+ // Walk the /proc tree looking for the oprofile daemon
+ struct dirent* Entry;
+ while (0 != (Entry = readdir(ProcDir))) {
+ if (Entry->d_type == DT_DIR) {
+ // Build a path from the current entry name
+ SmallString<256> CmdLineFName;
+ raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name
+ << "/cmdline";
+
+ // Open the cmdline file
+ int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR);
+ if (CmdLineFD != -1) {
+ char ExeName[PATH_MAX+1];
+ char* BaseName = 0;
+
+ // Read the cmdline file
+ ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1);
+ close(CmdLineFD);
+ ssize_t Idx = 0;
+
+ // Find the terminator for the first string
+ while (Idx < NumRead-1 && ExeName[Idx] != 0) {
+ Idx++;
+ }
+
+ // Go back to the last non-null character
+ Idx--;
+
+ // Find the last path separator in the first string
+ while (Idx > 0) {
+ if (ExeName[Idx] == '/') {
+ BaseName = ExeName + Idx + 1;
+ break;
+ }
+ Idx--;
+ }
+
+ // Test this to see if it is the oprofile daemon
+ if (BaseName != 0 && !strcmp("oprofiled", BaseName)) {
+ // If it is, we're done
+ closedir(ProcDir);
+ return true;
+ }
+ }
+ }
+ }
+
+ // We've looked through all the files and didn't find the daemon
+ closedir(ProcDir);
+ return false;
+}
+
+bool OProfileWrapper::op_open_agent() {
+ if (!Initialized)
+ initialize();
+
+ if (OpenAgentFunc != 0) {
+ Agent = OpenAgentFunc();
+ return Agent != 0;
+ }
+
+ return false;
+}
+
+int OProfileWrapper::op_close_agent() {
+ if (!Initialized)
+ initialize();
+
+ int ret = -1;
+ if (Agent && CloseAgentFunc) {
+ ret = CloseAgentFunc(Agent);
+ if (ret == 0) {
+ Agent = 0;
+ }
+ }
+ return ret;
+}
+
+bool OProfileWrapper::isAgentAvailable() {
+ return Agent != 0;
+}
+
+int OProfileWrapper::op_write_native_code(const char* Name,
+ uint64_t Addr,
+ void const* Code,
+ const unsigned int Size) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && WriteNativeCodeFunc)
+ return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size);
+
+ return -1;
+}
+
+int OProfileWrapper::op_write_debug_line_info(
+ void const* Code,
+ size_t NumEntries,
+ struct debug_line_info const* Info) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && WriteDebugLineInfoFunc)
+ return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info);
+
+ return -1;
+}
+
+int OProfileWrapper::op_major_version() {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && MajorVersionFunc)
+ return MajorVersionFunc();
+
+ return -1;
+}
+
+int OProfileWrapper::op_minor_version() {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && MinorVersionFunc)
+ return MinorVersionFunc();
+
+ return -1;
+}
+
+int OProfileWrapper::op_unload_native_code(uint64_t Addr) {
+ if (!Initialized)
+ initialize();
+
+ if (Agent && UnloadNativeCodeFunc)
+ return UnloadNativeCodeFunc(Agent, Addr);
+
+ return -1;
+}
+
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index c236d1d9d115..002e63cd3b6b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -1,9 +1,5 @@
add_llvm_library(LLVMRuntimeDyld
RuntimeDyld.cpp
RuntimeDyldMachO.cpp
- )
-
-add_llvm_library_dependencies(LLVMRuntimeDyld
- LLVMObject
- LLVMSupport
+ RuntimeDyldELF.cpp
)
diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
new file mode 100644
index 000000000000..97dc86129a33
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = RuntimeDyld
+parent = ExecutionEngine
+required_libraries = Object Support
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 33dd70502798..63cec1aca3b1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1,4 +1,4 @@
-//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,6 +13,10 @@
#define DEBUG_TYPE "dyld"
#include "RuntimeDyldImpl.h"
+#include "RuntimeDyldELF.h"
+#include "RuntimeDyldMachO.h"
+#include "llvm/Support/Path.h"
+
using namespace llvm;
using namespace llvm::object;
@@ -22,35 +26,383 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {}
namespace llvm {
-void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
- uint8_t *EndAddress) {
- // Allocate memory for the function via the memory manager.
- uintptr_t Size = EndAddress - StartAddress + 1;
- uintptr_t AllocSize = Size;
- uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
- assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
- "Memory manager failed to allocate enough memory!");
- // Copy the function payload into the memory block.
- memcpy(Mem, StartAddress, Size);
- MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
- // Remember where we put it.
- Functions[Name] = sys::MemoryBlock(Mem, Size);
- // Default the assigned address for this symbol to wherever this
- // allocated it.
- SymbolTable[Name] = Mem;
- DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n");
-}
+namespace {
+ // Helper for extensive error checking in debug builds.
+ error_code Check(error_code Err) {
+ if (Err) {
+ report_fatal_error(Err.message());
+ }
+ return Err;
+ }
+} // end anonymous namespace
// Resolve the relocations for all symbols we currently know about.
void RuntimeDyldImpl::resolveRelocations() {
- // Just iterate over the symbols in our symbol table and assign their
- // addresses.
- StringMap<uint8_t*>::iterator i = SymbolTable.begin();
- StringMap<uint8_t*>::iterator e = SymbolTable.end();
- for (;i != e; ++i)
- reassignSymbolAddress(i->getKey(), i->getValue());
+ // First, resolve relocations associated with external symbols.
+ resolveSymbols();
+
+ // Just iterate over the sections we have and resolve all the relocations
+ // in them. Gross overkill, but it gets the job done.
+ for (int i = 0, e = Sections.size(); i != e; ++i) {
+ reassignSectionAddress(i, Sections[i].LoadAddress);
+ }
}
+void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress,
+ uint64_t TargetAddress) {
+ for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
+ if (Sections[i].Address == LocalAddress) {
+ reassignSectionAddress(i, TargetAddress);
+ return;
+ }
+ }
+ llvm_unreachable("Attempting to remap address of unknown section!");
+}
+
+bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
+ // FIXME: ObjectFile don't modify MemoryBuffer.
+ // It should use const MemoryBuffer as parameter.
+ OwningPtr<ObjectFile> obj(ObjectFile::createObjectFile(
+ const_cast<MemoryBuffer*>(InputBuffer)));
+ if (!obj)
+ report_fatal_error("Unable to create object image from memory buffer!");
+
+ Arch = (Triple::ArchType)obj->getArch();
+
+ LocalSymbolMap LocalSymbols; // Functions and data symbols from the
+ // object file.
+ ObjSectionToIDMap LocalSections; // Used sections from the object file
+ CommonSymbolMap CommonSymbols; // Common symbols requiring allocation
+ uint64_t CommonSize = 0;
+
+ error_code err;
+ // Parse symbols
+ DEBUG(dbgs() << "Parse symbols:\n");
+ for (symbol_iterator i = obj->begin_symbols(), e = obj->end_symbols();
+ i != e; i.increment(err)) {
+ Check(err);
+ object::SymbolRef::Type SymType;
+ StringRef Name;
+ Check(i->getType(SymType));
+ Check(i->getName(Name));
+
+ uint32_t flags;
+ Check(i->getFlags(flags));
+
+ bool isCommon = flags & SymbolRef::SF_Common;
+ if (isCommon) {
+ // Add the common symbols to a list. We'll allocate them all below.
+ uint64_t Size = 0;
+ Check(i->getSize(Size));
+ CommonSize += Size;
+ CommonSymbols[*i] = Size;
+ } else {
+ if (SymType == object::SymbolRef::ST_Function ||
+ SymType == object::SymbolRef::ST_Data) {
+ uint64_t FileOffset;
+ StringRef sData;
+ section_iterator si = obj->end_sections();
+ Check(i->getFileOffset(FileOffset));
+ Check(i->getSection(si));
+ if (si == obj->end_sections()) continue;
+ Check(si->getContents(sData));
+ const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
+ (uintptr_t)FileOffset;
+ uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin());
+ unsigned SectionID =
+ findOrEmitSection(*si,
+ SymType == object::SymbolRef::ST_Function,
+ LocalSections);
+ bool isGlobal = flags & SymbolRef::SF_Global;
+ LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
+ DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
+ << " flags: " << flags
+ << " SID: " << SectionID
+ << " Offset: " << format("%p", SectOffset));
+ if (isGlobal)
+ SymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+ }
+ }
+ DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
+ }
+
+ // Allocate common symbols
+ if (CommonSize != 0)
+ emitCommonSymbols(CommonSymbols, CommonSize, LocalSymbols);
+
+ // Parse and proccess relocations
+ DEBUG(dbgs() << "Parse relocations:\n");
+ for (section_iterator si = obj->begin_sections(),
+ se = obj->end_sections(); si != se; si.increment(err)) {
+ Check(err);
+ bool isFirstRelocation = true;
+ unsigned SectionID = 0;
+ StubMap Stubs;
+
+ for (relocation_iterator i = si->begin_relocations(),
+ e = si->end_relocations(); i != e; i.increment(err)) {
+ Check(err);
+
+ // If it's first relocation in this section, find its SectionID
+ if (isFirstRelocation) {
+ SectionID = findOrEmitSection(*si, true, LocalSections);
+ DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
+ isFirstRelocation = false;
+ }
+
+ ObjRelocationInfo RI;
+ RI.SectionID = SectionID;
+ Check(i->getAdditionalInfo(RI.AdditionalInfo));
+ Check(i->getOffset(RI.Offset));
+ Check(i->getSymbol(RI.Symbol));
+ Check(i->getType(RI.Type));
+
+ DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
+ << " Offset: " << format("%p", (uintptr_t)RI.Offset)
+ << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
+ << "\n");
+ processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+ }
+ }
+ return false;
+}
+
+unsigned RuntimeDyldImpl::emitCommonSymbols(const CommonSymbolMap &Map,
+ uint64_t TotalSize,
+ LocalSymbolMap &LocalSymbols) {
+ // Allocate memory for the section
+ unsigned SectionID = Sections.size();
+ uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
+ SectionID);
+ if (!Addr)
+ report_fatal_error("Unable to allocate memory for common symbols!");
+ uint64_t Offset = 0;
+ Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0));
+ memset(Addr, 0, TotalSize);
+
+ DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
+ << " new addr: " << format("%p", Addr)
+ << " DataSize: " << TotalSize
+ << "\n");
+
+ // Assign the address of each symbol
+ for (CommonSymbolMap::const_iterator it = Map.begin(), itEnd = Map.end();
+ it != itEnd; it++) {
+ uint64_t Size = it->second;
+ StringRef Name;
+ it->first.getName(Name);
+ LocalSymbols[Name.data()] = SymbolLoc(SectionID, Offset);
+ Offset += Size;
+ Addr += Size;
+ }
+
+ return SectionID;
+}
+
+unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section,
+ bool IsCode) {
+
+ unsigned StubBufSize = 0,
+ StubSize = getMaxStubSize();
+ error_code err;
+ if (StubSize > 0) {
+ for (relocation_iterator i = Section.begin_relocations(),
+ e = Section.end_relocations(); i != e; i.increment(err), Check(err))
+ StubBufSize += StubSize;
+ }
+ StringRef data;
+ uint64_t Alignment64;
+ Check(Section.getContents(data));
+ Check(Section.getAlignment(Alignment64));
+
+ unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
+ bool IsRequired;
+ bool IsVirtual;
+ bool IsZeroInit;
+ uint64_t DataSize;
+ Check(Section.isRequiredForExecution(IsRequired));
+ Check(Section.isVirtual(IsVirtual));
+ Check(Section.isZeroInit(IsZeroInit));
+ Check(Section.getSize(DataSize));
+
+ unsigned Allocate;
+ unsigned SectionID = Sections.size();
+ uint8_t *Addr;
+ const char *pData = 0;
+
+ // Some sections, such as debug info, don't need to be loaded for execution.
+ // Leave those where they are.
+ if (IsRequired) {
+ Allocate = DataSize + StubBufSize;
+ Addr = IsCode
+ ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
+ : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+ if (!Addr)
+ report_fatal_error("Unable to allocate section memory!");
+
+ // Virtual sections have no data in the object image, so leave pData = 0
+ if (!IsVirtual)
+ pData = data.data();
+
+ // Zero-initialize or copy the data from the image
+ if (IsZeroInit || IsVirtual)
+ memset(Addr, 0, DataSize);
+ else
+ memcpy(Addr, pData, DataSize);
+
+ DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " obj addr: " << format("%p", pData)
+ << " new addr: " << format("%p", Addr)
+ << " DataSize: " << DataSize
+ << " StubBufSize: " << StubBufSize
+ << " Allocate: " << Allocate
+ << "\n");
+ }
+ else {
+ // Even if we didn't load the section, we need to record an entry for it
+ // to handle later processing (and by 'handle' I mean don't do anything
+ // with these sections).
+ Allocate = 0;
+ Addr = 0;
+ DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+ << " obj addr: " << format("%p", data.data())
+ << " new addr: 0"
+ << " DataSize: " << DataSize
+ << " StubBufSize: " << StubBufSize
+ << " Allocate: " << Allocate
+ << "\n");
+ }
+
+ Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData));
+ return SectionID;
+}
+
+unsigned RuntimeDyldImpl::findOrEmitSection(const SectionRef &Section,
+ bool IsCode,
+ ObjSectionToIDMap &LocalSections) {
+
+ unsigned SectionID = 0;
+ ObjSectionToIDMap::iterator i = LocalSections.find(Section);
+ if (i != LocalSections.end())
+ SectionID = i->second;
+ else {
+ SectionID = emitSection(Section, IsCode);
+ LocalSections[Section] = SectionID;
+ }
+ return SectionID;
+}
+
+void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value,
+ unsigned SectionID, uintptr_t Offset,
+ uint32_t RelType) {
+ DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName)
+ << " SID: " << Value.SectionID
+ << " Addend: " << format("%p", Value.Addend)
+ << " Offset: " << format("%p", Offset)
+ << " RelType: " << format("%x", RelType)
+ << "\n");
+
+ if (Value.SymbolName == 0) {
+ Relocations[Value.SectionID].push_back(RelocationEntry(
+ SectionID,
+ Offset,
+ RelType,
+ Value.Addend));
+ } else
+ SymbolRelocations[Value.SymbolName].push_back(RelocationEntry(
+ SectionID,
+ Offset,
+ RelType,
+ Value.Addend));
+}
+
+uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
+ // TODO: There is only ARM far stub now. We should add the Thumb stub,
+ // and stubs for branches Thumb - ARM and ARM - Thumb.
+ if (Arch == Triple::arm) {
+ uint32_t *StubAddr = (uint32_t*)Addr;
+ *StubAddr = 0xe51ff004; // ldr pc,<label>
+ return (uint8_t*)++StubAddr;
+ }
+ else
+ return Addr;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
+ uint64_t Addr) {
+ // The address to use for relocation resolution is not
+ // the address of the local section buffer. We must be doing
+ // a remote execution environment of some sort. Re-apply any
+ // relocations referencing this section with the given address.
+ //
+ // Addr is a uint64_t because we can't assume the pointer width
+ // of the target is the same as that of the host. Just use a generic
+ // "big enough" type.
+ Sections[SectionID].LoadAddress = Addr;
+ DEBUG(dbgs() << "Resolving relocations Section #" << SectionID
+ << "\t" << format("%p", (uint8_t *)Addr)
+ << "\n");
+ resolveRelocationList(Relocations[SectionID], Addr);
+}
+
+void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
+ uint64_t Value) {
+ // Ignore relocations for sections that were not loaded
+ if (Sections[RE.SectionID].Address != 0) {
+ uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+ DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+ << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+ << " Data: " << RE.Data
+ << " Addend: " << RE.Addend
+ << "\n");
+
+ resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+ Value, RE.Data, RE.Addend);
+ }
+}
+
+void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
+ uint64_t Value) {
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ resolveRelocationEntry(Relocs[i], Value);
+ }
+}
+
+// resolveSymbols - Resolve any relocations to the specified symbols if
+// we know where it lives.
+void RuntimeDyldImpl::resolveSymbols() {
+ StringMap<RelocationList>::iterator i = SymbolRelocations.begin(),
+ e = SymbolRelocations.end();
+ for (; i != e; i++) {
+ StringRef Name = i->first();
+ RelocationList &Relocs = i->second;
+ StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name);
+ if (Loc == SymbolTable.end()) {
+ // This is an external symbol, try to get it address from
+ // MemoryManager.
+ uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
+ true);
+ DEBUG(dbgs() << "Resolving relocations Name: " << Name
+ << "\t" << format("%p", Addr)
+ << "\n");
+ resolveRelocationList(Relocs, (uintptr_t)Addr);
+ } else {
+ // Change the relocation to be section relative rather than symbol
+ // relative and move it to the resolved relocation list.
+ DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n");
+ for (int i = 0, e = Relocs.size(); i != e; ++i) {
+ RelocationEntry Entry = Relocs[i];
+ Entry.Addend += Loc->second.second;
+ Relocations[Loc->second.first].push_back(Entry);
+ }
+ Relocs.clear();
+ }
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// RuntimeDyld class implementation
RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
@@ -64,12 +416,36 @@ RuntimeDyld::~RuntimeDyld() {
bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
if (!Dyld) {
- if (RuntimeDyldMachO::isKnownFormat(InputBuffer))
- Dyld = new RuntimeDyldMachO(MM);
- else
- report_fatal_error("Unknown object format!");
+ sys::LLVMFileType type = sys::IdentifyFileType(
+ InputBuffer->getBufferStart(),
+ static_cast<unsigned>(InputBuffer->getBufferSize()));
+ switch (type) {
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType:
+ Dyld = new RuntimeDyldELF(MM);
+ break;
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::Mach_O_DSYMCompanion_FileType:
+ Dyld = new RuntimeDyldMachO(MM);
+ break;
+ case sys::Unknown_FileType:
+ case sys::Bitcode_FileType:
+ case sys::Archive_FileType:
+ case sys::COFF_FileType:
+ report_fatal_error("Incompatible object format!");
+ }
} else {
- if(!Dyld->isCompatibleFormat(InputBuffer))
+ if (!Dyld->isCompatibleFormat(InputBuffer))
report_fatal_error("Incompatible object format!");
}
@@ -84,8 +460,14 @@ void RuntimeDyld::resolveRelocations() {
Dyld->resolveRelocations();
}
-void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
- Dyld->reassignSymbolAddress(Name, Addr);
+void RuntimeDyld::reassignSectionAddress(unsigned SectionID,
+ uint64_t Addr) {
+ Dyld->reassignSectionAddress(SectionID, Addr);
+}
+
+void RuntimeDyld::mapSectionAddress(void *LocalAddress,
+ uint64_t TargetAddress) {
+ Dyld->mapSectionAddress(LocalAddress, TargetAddress);
}
StringRef RuntimeDyld::getErrorString() {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
new file mode 100644
index 000000000000..57fefee5dedc
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -0,0 +1,262 @@
+//===-- RuntimeDyldELF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of ELF support for the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "RuntimeDyldELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+
+void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_X86_64_64: {
+ uint64_t *Target = (uint64_t*)(LocalAddress);
+ *Target = Value + Addend;
+ break;
+ }
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S: {
+ Value += Addend;
+ // FIXME: Handle the possibility of this assertion failing
+ assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
+ (Type == ELF::R_X86_64_32S &&
+ (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL));
+ uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress);
+ *Target = TruncatedAddr;
+ break;
+ }
+ case ELF::R_X86_64_PC32: {
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+ int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+ assert(RealOffset <= 214783647 && RealOffset >= -214783648);
+ int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
+ *Placeholder = TruncOffset;
+ break;
+ }
+ }
+}
+
+void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ switch (Type) {
+ case ELF::R_386_32: {
+ uint32_t *Target = (uint32_t*)(LocalAddress);
+ uint32_t Placeholder = *Target;
+ *Target = Placeholder + Value + Addend;
+ break;
+ }
+ case ELF::R_386_PC32: {
+ uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+ uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+ *Placeholder = RealOffset;
+ break;
+ }
+ default:
+ // There are other relocation types, but it appears these are the
+ // only ones currently used by the LLVM ELF object writer
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ }
+}
+
+void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend) {
+ // TODO: Add Thumb relocations.
+ uint32_t* TargetPtr = (uint32_t*)LocalAddress;
+ Value += Addend;
+
+ DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << LocalAddress
+ << " FinalAddress: " << format("%p",FinalAddress)
+ << " Value: " << format("%x",Value)
+ << " Type: " << format("%x",Type)
+ << " Addend: " << format("%x",Addend)
+ << "\n");
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Not implemented relocation type!");
+
+ // Just write 32bit value to relocation address
+ case ELF::R_ARM_ABS32 :
+ *TargetPtr = Value;
+ break;
+
+ // Write first 16 bit of 32 bit value to the mov instruction.
+ // Last 4 bit should be shifted.
+ case ELF::R_ARM_MOVW_ABS_NC :
+ Value = Value & 0xFFFF;
+ *TargetPtr |= Value & 0xFFF;
+ *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+ break;
+
+ // Write last 16 bit of 32 bit value to the mov instruction.
+ // Last 4 bit should be shifted.
+ case ELF::R_ARM_MOVT_ABS :
+ Value = (Value >> 16) & 0xFFFF;
+ *TargetPtr |= Value & 0xFFF;
+ *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+ break;
+
+ // Write 24 bit relative value to the branch instruction.
+ case ELF::R_ARM_PC24 : // Fall through.
+ case ELF::R_ARM_CALL : // Fall through.
+ case ELF::R_ARM_JUMP24 :
+ int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
+ RelValue = (RelValue & 0x03FFFFFC) >> 2;
+ *TargetPtr &= 0xFF000000;
+ *TargetPtr |= RelValue;
+ break;
+ }
+}
+
+void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ switch (Arch) {
+ case Triple::x86_64:
+ resolveX86_64Relocation(LocalAddress, FinalAddress, Value, Type, Addend);
+ break;
+ case Triple::x86:
+ resolveX86Relocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
+ break;
+ case Triple::arm: // Fall through.
+ case Triple::thumb:
+ resolveARMRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+ (uint32_t)(Value & 0xffffffffL), Type,
+ (uint32_t)(Addend & 0xffffffffL));
+ break;
+ default: llvm_unreachable("Unsupported CPU type!");
+ }
+}
+
+void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols,
+ StubMap &Stubs) {
+
+ uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
+ intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
+ RelocationValueRef Value;
+ StringRef TargetName;
+ const SymbolRef &Symbol = Rel.Symbol;
+ Symbol.getName(TargetName);
+ DEBUG(dbgs() << "\t\tRelType: " << RelType
+ << " Addend: " << Addend
+ << " TargetName: " << TargetName
+ << "\n");
+ // First look the symbol in object file symbols.
+ LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data());
+ if (lsi != Symbols.end()) {
+ Value.SectionID = lsi->second.first;
+ Value.Addend = lsi->second.second;
+ } else {
+ // Second look the symbol in global symbol table.
+ StringMap<SymbolLoc>::iterator gsi = SymbolTable.find(TargetName.data());
+ if (gsi != SymbolTable.end()) {
+ Value.SectionID = gsi->second.first;
+ Value.Addend = gsi->second.second;
+ } else {
+ SymbolRef::Type SymType;
+ Symbol.getType(SymType);
+ switch (SymType) {
+ case SymbolRef::ST_Debug: {
+ // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
+ // and can be changed by another developers. Maybe best way is add
+ // a new symbol type ST_Section to SymbolRef and use it.
+ section_iterator si = Obj.end_sections();
+ Symbol.getSection(si);
+ if (si == Obj.end_sections())
+ llvm_unreachable("Symbol section not found, bad object file format!");
+ DEBUG(dbgs() << "\t\tThis is section symbol\n");
+ Value.SectionID = findOrEmitSection((*si), true, ObjSectionToID);
+ Value.Addend = Addend;
+ break;
+ }
+ case SymbolRef::ST_Unknown: {
+ Value.SymbolName = TargetName.data();
+ Value.Addend = Addend;
+ break;
+ }
+ default:
+ llvm_unreachable("Unresolved symbol type!");
+ break;
+ }
+ }
+ }
+ DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
+ << " Rel.Offset: " << Rel.Offset
+ << "\n");
+ if (Arch == Triple::arm &&
+ (RelType == ELF::R_ARM_PC24 ||
+ RelType == ELF::R_ARM_CALL ||
+ RelType == ELF::R_ARM_JUMP24)) {
+ // This is an ARM branch relocation, need to use a stub function.
+ DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+
+ // Look up for existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+ i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ AddRelocation(Value, Rel.SectionID,
+ StubTargetAddr - Section.Address, ELF::R_ARM_ABS32);
+ resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+ Section.StubOffset, RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else
+ AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
+}
+
+bool RuntimeDyldELF::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
+ StringRef Magic = InputBuffer->getBuffer().slice(0, ELF::EI_NIDENT);
+ return (memcmp(Magic.data(), ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+}
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
new file mode 100644
index 000000000000..36566da57a58
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -0,0 +1,62 @@
+//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_ELF_H
+#define LLVM_RUNTIME_DYLD_ELF_H
+
+#include "RuntimeDyldImpl.h"
+
+using namespace llvm;
+
+
+namespace llvm {
+class RuntimeDyldELF : public RuntimeDyldImpl {
+protected:
+ void resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ void resolveX86Relocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ void resolveARMRelocation(uint8_t *LocalAddress,
+ uint32_t FinalAddress,
+ uint32_t Value,
+ uint32_t Type,
+ int32_t Addend);
+
+ virtual void resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs);
+
+public:
+ RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+ bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 7190a3c36fe9..bf678af6ece7 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -1,4 +1,4 @@
-//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,42 +15,128 @@
#define LLVM_RUNTIME_DYLD_IMPL_H
#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Triple.h"
+#include <map>
+#include "llvm/Support/Format.h"
using namespace llvm;
using namespace llvm::object;
namespace llvm {
+
+class SectionEntry {
+public:
+ uint8_t* Address;
+ size_t Size;
+ uint64_t LoadAddress; // For each section, the address it will be
+ // considered to live at for relocations. The same
+ // as the pointer to the above memory block for
+ // hosted JITs.
+ uintptr_t StubOffset; // It's used for architecturies with stub
+ // functions for far relocations like ARM.
+ uintptr_t ObjAddress; // Section address in object file. It's use for
+ // calculate MachO relocation addend
+ SectionEntry(uint8_t* address, size_t size, uintptr_t stubOffset,
+ uintptr_t objAddress)
+ : Address(address), Size(size), LoadAddress((uintptr_t)address),
+ StubOffset(stubOffset), ObjAddress(objAddress) {}
+};
+
+class RelocationEntry {
+public:
+ unsigned SectionID; // Section the relocation is contained in.
+ uintptr_t Offset; // Offset into the section for the relocation.
+ uint32_t Data; // Relocatino data. Including type of relocation
+ // and another flags and parameners from
+ intptr_t Addend; // Addend encoded in the instruction itself, if any,
+ // plus the offset into the source section for
+ // the symbol once the relocation is resolvable.
+ RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend)
+ : SectionID(id), Offset(offset), Data(data), Addend(addend) {}
+};
+
+// Raw relocation data from object file
+class ObjRelocationInfo {
+public:
+ unsigned SectionID;
+ uint64_t Offset;
+ SymbolRef Symbol;
+ uint64_t Type;
+ int64_t AdditionalInfo;
+};
+
+class RelocationValueRef {
+public:
+ unsigned SectionID;
+ intptr_t Addend;
+ const char *SymbolName;
+ RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {}
+
+ inline bool operator==(const RelocationValueRef &Other) const {
+ return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0;
+ }
+ inline bool operator <(const RelocationValueRef &Other) const {
+ return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0;
+ }
+};
+
class RuntimeDyldImpl {
protected:
- unsigned CPUType;
- unsigned CPUSubtype;
-
// The MemoryManager to load objects into.
RTDyldMemoryManager *MemMgr;
- // FIXME: This all assumes we're dealing with external symbols for anything
- // explicitly referenced. I.e., we can index by name and things
- // will work out. In practice, this may not be the case, so we
- // should find a way to effectively generalize.
+ // A list of emmitted sections.
+ typedef SmallVector<SectionEntry, 64> SectionList;
+ SectionList Sections;
- // For each function, we have a MemoryBlock of it's instruction data.
- StringMap<sys::MemoryBlock> Functions;
+ // Keep a map of sections from object file to the SectionID which
+ // references it.
+ typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
// Master symbol table. As modules are loaded and external symbols are
- // resolved, their addresses are stored here.
- StringMap<uint8_t*> SymbolTable;
+ // resolved, their addresses are stored here as a SectionID/Offset pair.
+ typedef std::pair<unsigned, uintptr_t> SymbolLoc;
+ StringMap<SymbolLoc> SymbolTable;
+ typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap;
+
+ // Keep a map of common symbols to their sizes
+ typedef std::map<SymbolRef, unsigned> CommonSymbolMap;
+
+ // For each symbol, keep a list of relocations based on it. Anytime
+ // its address is reassigned (the JIT re-compiled the function, e.g.),
+ // the relocations get re-resolved.
+ // The symbol (or section) the relocation is sourced from is the Key
+ // in the relocation list where it's stored.
+ typedef SmallVector<RelocationEntry, 64> RelocationList;
+ // Relocations to sections already loaded. Indexed by SectionID which is the
+ // source of the address. The target where the address will be writen is
+ // SectionID/Offset in the relocation itself.
+ DenseMap<unsigned, RelocationList> Relocations;
+ // Relocations to external symbols that are not yet resolved.
+ // Indexed by symbol name.
+ StringMap<RelocationList> SymbolRelocations;
+
+ typedef std::map<RelocationValueRef, uintptr_t> StubMap;
+
+ Triple::ArchType Arch;
+
+ inline unsigned getMaxStubSize() {
+ if (Arch == Triple::arm || Arch == Triple::thumb)
+ return 8; // 32-bit instruction and 32-bit address
+ else
+ return 0;
+ }
bool HasError;
std::string ErrorStr;
@@ -62,25 +148,84 @@ protected:
return true;
}
- void extractFunction(StringRef Name, uint8_t *StartAddress,
- uint8_t *EndAddress);
+ uint8_t *getSectionAddress(unsigned SectionID) {
+ return (uint8_t*)Sections[SectionID].Address;
+ }
+ /// \brief Emits a section containing common symbols.
+ /// \return SectionID.
+ unsigned emitCommonSymbols(const CommonSymbolMap &Map,
+ uint64_t TotalSize,
+ LocalSymbolMap &Symbols);
+
+ /// \brief Emits section data from the object file to the MemoryManager.
+ /// \param IsCode if it's true then allocateCodeSection() will be
+ /// used for emmits, else allocateDataSection() will be used.
+ /// \return SectionID.
+ unsigned emitSection(const SectionRef &Section, bool IsCode);
+
+ /// \brief Find Section in LocalSections. If the secton is not found - emit
+ /// it and store in LocalSections.
+ /// \param IsCode if it's true then allocateCodeSection() will be
+ /// used for emmits, else allocateDataSection() will be used.
+ /// \return SectionID.
+ unsigned findOrEmitSection(const SectionRef &Section, bool IsCode,
+ ObjSectionToIDMap &LocalSections);
+
+ /// \brief If Value.SymbolName is NULL then store relocation to the
+ /// Relocations, else store it in the SymbolRelocations.
+ void AddRelocation(const RelocationValueRef &Value, unsigned SectionID,
+ uintptr_t Offset, uint32_t RelType);
+
+ /// \brief Emits long jump instruction to Addr.
+ /// \return Pointer to the memory area for emitting target address.
+ uint8_t* createStubFunction(uint8_t *Addr);
+
+ /// \brief Resolves relocations from Relocs list with address from Value.
+ void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
+ void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
+
+ /// \brief A object file specific relocation resolver
+ /// \param Address Address to apply the relocation action
+ /// \param Value Target symbol address to apply the relocation action
+ /// \param Type object file specific relocation type
+ /// \param Addend A constant addend used to compute the value to be stored
+ /// into the relocatable field
+ virtual void resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) = 0;
+
+ /// \brief Parses the object file relocation and store it to Relocations
+ /// or SymbolRelocations. Its depend from object file type.
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs) = 0;
+
+ void resolveSymbols();
public:
RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
virtual ~RuntimeDyldImpl();
- virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+ bool loadObject(const MemoryBuffer *InputBuffer);
void *getSymbolAddress(StringRef Name) {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
- return SymbolTable.lookup(Name);
+ if (SymbolTable.find(Name) == SymbolTable.end())
+ return 0;
+ SymbolLoc Loc = SymbolTable.lookup(Name);
+ return getSectionAddress(Loc.first) + Loc.second;
}
void resolveRelocations();
- virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0;
+ void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
+
+ void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress);
// Is the linker in an error state?
bool hasError() { return HasError; }
@@ -92,58 +237,7 @@ public:
StringRef getErrorString() { return ErrorStr; }
virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
-};
-
-
-class RuntimeDyldMachO : public RuntimeDyldImpl {
- // For each symbol, keep a list of relocations based on it. Anytime
- // its address is reassigned (the JIT re-compiled the function, e.g.),
- // the relocations get re-resolved.
- struct RelocationEntry {
- std::string Target; // Object this relocation is contained in.
- uint64_t Offset; // Offset into the object for the relocation.
- uint32_t Data; // Second word of the raw macho relocation entry.
- int64_t Addend; // Addend encoded in the instruction itself, if any.
- bool isResolved; // Has this relocation been resolved previously?
-
- RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
- : Target(t), Offset(offset), Data(data), Addend(addend),
- isResolved(false) {}
- };
- typedef SmallVector<RelocationEntry, 4> RelocationList;
- StringMap<RelocationList> Relocations;
-
- // FIXME: Also keep a map of all the relocations contained in an object. Use
- // this to dynamically answer whether all of the relocations in it have
- // been resolved or not.
-
- bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
- unsigned Type, unsigned Size);
- bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
- unsigned Type, unsigned Size);
- bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
- unsigned Type, unsigned Size);
-
- bool loadSegment32(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
- bool loadSegment64(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-
-public:
- RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
- bool loadObject(MemoryBuffer *InputBuffer);
-
- void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
-
- static bool isKnownFormat(const MemoryBuffer *InputBuffer);
-
- bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
- return isKnownFormat(InputBuffer);
- }
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 623e9b2acca3..1318b4454255 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -1,4 +1,4 @@
-//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -15,73 +15,147 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "RuntimeDyldImpl.h"
+#include "RuntimeDyldMachO.h"
using namespace llvm;
using namespace llvm::object;
namespace llvm {
-bool RuntimeDyldMachO::
-resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
- unsigned Type, unsigned Size) {
+void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ bool isPCRel = (Type >> 24) & 1;
+ unsigned MachoType = (Type >> 28) & 0xf;
+ unsigned Size = 1 << ((Type >> 25) & 3);
+
+ DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress)
+ << " FinalAddress: " << format("%p", FinalAddress)
+ << " Value: " << format("%p", Value)
+ << " Addend: " << Addend
+ << " isPCRel: " << isPCRel
+ << " MachoType: " << MachoType
+ << " Size: " << Size
+ << "\n");
+
// This just dispatches to the proper target specific routine.
- switch (CPUType) {
- default: assert(0 && "Unsupported CPU type!");
- case mach::CTM_x86_64:
- return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
- isPCRel, Type, Size);
- case mach::CTM_ARM:
- return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
- isPCRel, Type, Size);
+ switch (Arch) {
+ default: llvm_unreachable("Unsupported CPU type!");
+ case Triple::x86_64:
+ resolveX86_64Relocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
+ case Triple::x86:
+ resolveI386Relocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ Type,
+ Size,
+ Addend);
+ break;
+ case Triple::arm: // Fall through.
+ case Triple::thumb:
+ resolveARMRelocation(LocalAddress,
+ FinalAddress,
+ (uintptr_t)Value,
+ isPCRel,
+ MachoType,
+ Size,
+ Addend);
+ break;
+ }
+}
+
+bool RuntimeDyldMachO::
+resolveI386Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
+ if (isPCRel)
+ Value -= FinalAddress + 4; // see resolveX86_64Relocation
+
+ switch (Type) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_Vanilla: {
+ uint8_t *p = LocalAddress;
+ uint64_t ValueToWrite = Value + Addend;
+ for (unsigned i = 0; i < Size; ++i) {
+ *p++ = (uint8_t)(ValueToWrite & 0xff);
+ ValueToWrite >>= 8;
+ }
+ }
+ case macho::RIT_Difference:
+ case macho::RIT_Generic_LocalDifference:
+ case macho::RIT_Generic_PreboundLazyPointer:
+ return Error("Relocation type not implemented yet!");
}
- llvm_unreachable("");
}
bool RuntimeDyldMachO::
-resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
- bool isPCRel, unsigned Type,
- unsigned Size) {
+resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (isPCRel)
// FIXME: It seems this value needs to be adjusted by 4 for an effective PC
// address. Is that expected? Only for branches, perhaps?
- Value -= Address + 4;
+ Value -= FinalAddress + 4;
switch(Type) {
default:
llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_X86_64_Signed1:
+ case macho::RIT_X86_64_Signed2:
+ case macho::RIT_X86_64_Signed4:
+ case macho::RIT_X86_64_Signed:
case macho::RIT_X86_64_Unsigned:
case macho::RIT_X86_64_Branch: {
+ Value += Addend;
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t*)Address;
+ uint8_t *p = (uint8_t*)LocalAddress;
for (unsigned i = 0; i < Size; ++i) {
*p++ = (uint8_t)Value;
Value >>= 8;
}
return false;
}
- case macho::RIT_X86_64_Signed:
case macho::RIT_X86_64_GOTLoad:
case macho::RIT_X86_64_GOT:
case macho::RIT_X86_64_Subtractor:
- case macho::RIT_X86_64_Signed1:
- case macho::RIT_X86_64_Signed2:
- case macho::RIT_X86_64_Signed4:
case macho::RIT_X86_64_TLV:
return Error("Relocation type not implemented yet!");
}
- return false;
}
-bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
- bool isPCRel, unsigned Type,
- unsigned Size) {
+bool RuntimeDyldMachO::
+resolveARMRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend) {
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (isPCRel) {
- Value -= Address;
+ Value -= FinalAddress;
// ARM PCRel relocations have an effective-PC offset of two instructions
// (four bytes in Thumb mode, 8 bytes in ARM mode).
// FIXME: For now, assume ARM mode.
@@ -92,10 +166,9 @@ bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
default:
llvm_unreachable("Invalid relocation type!");
case macho::RIT_Vanilla: {
- llvm_unreachable("Invalid relocation type!");
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t*)Address;
+ uint8_t *p = (uint8_t*)LocalAddress;
for (unsigned i = 0; i < Size; ++i) {
*p++ = (uint8_t)Value;
Value >>= 8;
@@ -105,7 +178,7 @@ bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
case macho::RIT_ARM_Branch24Bit: {
// Mask the value into the target address. We know instructions are
// 32-bit aligned, so we can do it all at once.
- uint32_t *p = (uint32_t*)Address;
+ uint32_t *p = (uint32_t*)LocalAddress;
// The low two bits of the value are not encoded.
Value >>= 2;
// Mask the value to 24 bits.
@@ -131,388 +204,84 @@ bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
return false;
}
-bool RuntimeDyldMachO::
-loadSegment32(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
- InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
- Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
- if (!SegmentLC)
- return Error("unable to load segment load command");
-
- for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
- InMemoryStruct<macho::Section> Sect;
- Obj->ReadSection(*SegmentLCI, SectNum, Sect);
- if (!Sect)
- return Error("unable to load section: '" + Twine(SectNum) + "'");
-
- // FIXME: For the time being, we're only loading text segments.
- if (Sect->Flags != 0x80000400)
- continue;
-
- // Address and names of symbols in the section.
- typedef std::pair<uint64_t, StringRef> SymbolEntry;
- SmallVector<SymbolEntry, 64> Symbols;
- // Index of all the names, in this section or not. Used when we're
- // dealing with relocation entries.
- SmallVector<StringRef, 64> SymbolNames;
- for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
- InMemoryStruct<macho::SymbolTableEntry> STE;
- Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
- if (!STE)
- return Error("unable to read symbol: '" + Twine(i) + "'");
- if (STE->SectionIndex > SegmentLC->NumSections)
- return Error("invalid section index for symbol: '" + Twine(i) + "'");
- // Get the symbol name.
- StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
- SymbolNames.push_back(Name);
-
- // Just skip symbols not defined in this section.
- if ((unsigned)STE->SectionIndex - 1 != SectNum)
- continue;
-
- // FIXME: Check the symbol type and flags.
- if (STE->Type != 0xF) // external, defined in this section.
- continue;
- // Flags == 0x8 marks a thumb function for ARM, which is fine as it
- // doesn't require any special handling here.
- if (STE->Flags != 0x0 && STE->Flags != 0x8)
- continue;
-
- // Remember the symbol.
- Symbols.push_back(SymbolEntry(STE->Value, Name));
-
- DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
- (Sect->Address + STE->Value) << "\n");
- }
- // Sort the symbols by address, just in case they didn't come in that way.
- array_pod_sort(Symbols.begin(), Symbols.end());
-
- // If there weren't any functions (odd, but just in case...)
- if (!Symbols.size())
- continue;
-
- // Extract the function data.
- uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
- SegmentLC->FileSize).data();
- for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
- uint64_t StartOffset = Sect->Address + Symbols[i].first;
- uint64_t EndOffset = Symbols[i + 1].first - 1;
- DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
- << " from [" << StartOffset << ", " << EndOffset << "]\n");
- extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
- }
- // The last symbol we do after since the end address is calculated
- // differently because there is no next symbol to reference.
- uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
- uint64_t EndOffset = Sect->Size - 1;
- DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
- << " from [" << StartOffset << ", " << EndOffset << "]\n");
- extractFunction(Symbols[Symbols.size()-1].second,
- Base + StartOffset, Base + EndOffset);
-
- // Now extract the relocation information for each function and process it.
- for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
- InMemoryStruct<macho::RelocationEntry> RE;
- Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
- if (RE->Word0 & macho::RF_Scattered)
- return Error("NOT YET IMPLEMENTED: scattered relocations.");
- // Word0 of the relocation is the offset into the section where the
- // relocation should be applied. We need to translate that into an
- // offset into a function since that's our atom.
- uint32_t Offset = RE->Word0;
- // Look for the function containing the address. This is used for JIT
- // code, so the number of functions in section is almost always going
- // to be very small (usually just one), so until we have use cases
- // where that's not true, just use a trivial linear search.
- unsigned SymbolNum;
- unsigned NumSymbols = Symbols.size();
- assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
- "No symbol containing relocation!");
- for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
- if (Symbols[SymbolNum + 1].first > Offset)
- break;
- // Adjust the offset to be relative to the symbol.
- Offset -= Symbols[SymbolNum].first;
- // Get the name of the symbol containing the relocation.
- StringRef TargetName = SymbolNames[SymbolNum];
-
- bool isExtern = (RE->Word1 >> 27) & 1;
- // Figure out the source symbol of the relocation. If isExtern is true,
- // this relocation references the symbol table, otherwise it references
- // a section in the same object, numbered from 1 through NumSections
- // (SectionBases is [0, NumSections-1]).
- // FIXME: Some targets (ARM) use internal relocations even for
- // externally visible symbols, if the definition is in the same
- // file as the reference. We need to convert those back to by-name
- // references. We can resolve the address based on the section
- // offset and see if we have a symbol at that address. If we do,
- // use that; otherwise, puke.
- if (!isExtern)
- return Error("Internal relocations not supported.");
- uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
- StringRef SourceName = SymbolNames[SourceNum];
-
- // FIXME: Get the relocation addend from the target address.
-
- // Now store the relocation information. Associate it with the source
- // symbol.
- Relocations[SourceName].push_back(RelocationEntry(TargetName,
- Offset,
- RE->Word1,
- 0 /*Addend*/));
- DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
- << " from '" << SourceName << "(Word1: "
- << format("0x%x", RE->Word1) << ")\n");
- }
- }
- return false;
-}
-
-
-bool RuntimeDyldMachO::
-loadSegment64(const MachOObject *Obj,
- const MachOObject::LoadCommandInfo *SegmentLCI,
- const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
- InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
- Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
- if (!Segment64LC)
- return Error("unable to load segment load command");
-
- for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
- InMemoryStruct<macho::Section64> Sect;
- Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
- if (!Sect)
- return Error("unable to load section: '" + Twine(SectNum) + "'");
-
- // FIXME: For the time being, we're only loading text segments.
- if (Sect->Flags != 0x80000400)
- continue;
-
- // Address and names of symbols in the section.
- typedef std::pair<uint64_t, StringRef> SymbolEntry;
- SmallVector<SymbolEntry, 64> Symbols;
- // Index of all the names, in this section or not. Used when we're
- // dealing with relocation entries.
- SmallVector<StringRef, 64> SymbolNames;
- for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
- InMemoryStruct<macho::Symbol64TableEntry> STE;
- Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
- if (!STE)
- return Error("unable to read symbol: '" + Twine(i) + "'");
- if (STE->SectionIndex > Segment64LC->NumSections)
- return Error("invalid section index for symbol: '" + Twine(i) + "'");
- // Get the symbol name.
- StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
- SymbolNames.push_back(Name);
-
- // Just skip symbols not defined in this section.
- if ((unsigned)STE->SectionIndex - 1 != SectNum)
- continue;
-
- // FIXME: Check the symbol type and flags.
- if (STE->Type != 0xF) // external, defined in this section.
- continue;
- if (STE->Flags != 0x0)
- continue;
-
- // Remember the symbol.
- Symbols.push_back(SymbolEntry(STE->Value, Name));
-
- DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
- (Sect->Address + STE->Value) << "\n");
+void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols,
+ StubMap &Stubs) {
+
+ uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+ RelocationValueRef Value;
+ SectionEntry &Section = Sections[Rel.SectionID];
+ uint8_t *Target = Section.Address + Rel.Offset;
+
+ bool isExtern = (RelType >> 27) & 1;
+ if (isExtern) {
+ StringRef TargetName;
+ const SymbolRef &Symbol = Rel.Symbol;
+ Symbol.getName(TargetName);
+ // First look the symbol in object file symbols.
+ LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data());
+ if (lsi != Symbols.end()) {
+ Value.SectionID = lsi->second.first;
+ Value.Addend = lsi->second.second;
+ } else {
+ // Second look the symbol in global symbol table.
+ StringMap<SymbolLoc>::iterator gsi = SymbolTable.find(TargetName.data());
+ if (gsi != SymbolTable.end()) {
+ Value.SectionID = gsi->second.first;
+ Value.Addend = gsi->second.second;
+ } else
+ Value.SymbolName = TargetName.data();
}
- // Sort the symbols by address, just in case they didn't come in that way.
- array_pod_sort(Symbols.begin(), Symbols.end());
-
- // If there weren't any functions (odd, but just in case...)
- if (!Symbols.size())
- continue;
-
- // Extract the function data.
- uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
- Segment64LC->FileSize).data();
- for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
- uint64_t StartOffset = Sect->Address + Symbols[i].first;
- uint64_t EndOffset = Symbols[i + 1].first - 1;
- DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
- << " from [" << StartOffset << ", " << EndOffset << "]\n");
- extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+ } else {
+ error_code err;
+ uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
+ section_iterator si = Obj.begin_sections(),
+ se = Obj.end_sections();
+ for (uint8_t i = 1; i < sectionIndex; i++) {
+ error_code err;
+ si.increment(err);
+ if (si == se)
+ break;
}
- // The last symbol we do after since the end address is calculated
- // differently because there is no next symbol to reference.
- uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
- uint64_t EndOffset = Sect->Size - 1;
- DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
- << " from [" << StartOffset << ", " << EndOffset << "]\n");
- extractFunction(Symbols[Symbols.size()-1].second,
- Base + StartOffset, Base + EndOffset);
-
- // Now extract the relocation information for each function and process it.
- for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
- InMemoryStruct<macho::RelocationEntry> RE;
- Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
- if (RE->Word0 & macho::RF_Scattered)
- return Error("NOT YET IMPLEMENTED: scattered relocations.");
- // Word0 of the relocation is the offset into the section where the
- // relocation should be applied. We need to translate that into an
- // offset into a function since that's our atom.
- uint32_t Offset = RE->Word0;
- // Look for the function containing the address. This is used for JIT
- // code, so the number of functions in section is almost always going
- // to be very small (usually just one), so until we have use cases
- // where that's not true, just use a trivial linear search.
- unsigned SymbolNum;
- unsigned NumSymbols = Symbols.size();
- assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
- "No symbol containing relocation!");
- for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
- if (Symbols[SymbolNum + 1].first > Offset)
- break;
- // Adjust the offset to be relative to the symbol.
- Offset -= Symbols[SymbolNum].first;
- // Get the name of the symbol containing the relocation.
- StringRef TargetName = SymbolNames[SymbolNum];
-
- bool isExtern = (RE->Word1 >> 27) & 1;
- // Figure out the source symbol of the relocation. If isExtern is true,
- // this relocation references the symbol table, otherwise it references
- // a section in the same object, numbered from 1 through NumSections
- // (SectionBases is [0, NumSections-1]).
- if (!isExtern)
- return Error("Internal relocations not supported.");
- uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
- StringRef SourceName = SymbolNames[SourceNum];
-
- // FIXME: Get the relocation addend from the target address.
-
- // Now store the relocation information. Associate it with the source
- // symbol.
- Relocations[SourceName].push_back(RelocationEntry(TargetName,
- Offset,
- RE->Word1,
- 0 /*Addend*/));
- DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
- << " from '" << SourceName << "(Word1: "
- << format("0x%x", RE->Word1) << ")\n");
+ assert(si != se && "No section containing relocation!");
+ Value.SectionID = findOrEmitSection(*si, true, ObjSectionToID);
+ Value.Addend = *(const intptr_t *)Target;
+ if (Value.Addend) {
+ // The MachO addend is offset from the current section, we need set it
+ // as offset from destination section
+ Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
}
}
- return false;
-}
-
-bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
- // If the linker is in an error state, don't do anything.
- if (hasError())
- return true;
- // Load the Mach-O wrapper object.
- std::string ErrorStr;
- OwningPtr<MachOObject> Obj(
- MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
- if (!Obj)
- return Error("unable to load object: '" + ErrorStr + "'");
-
- // Get the CPU type information from the header.
- const macho::Header &Header = Obj->getHeader();
-
- // FIXME: Error checking that the loaded object is compatible with
- // the system we're running on.
- CPUType = Header.CPUType;
- CPUSubtype = Header.CPUSubtype;
- // Validate that the load commands match what we expect.
- const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
- *DysymtabLCI = 0;
- for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
- const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
- switch (LCI.Command.Type) {
- case macho::LCT_Segment:
- case macho::LCT_Segment64:
- if (SegmentLCI)
- return Error("unexpected input object (multiple segments)");
- SegmentLCI = &LCI;
- break;
- case macho::LCT_Symtab:
- if (SymtabLCI)
- return Error("unexpected input object (multiple symbol tables)");
- SymtabLCI = &LCI;
- break;
- case macho::LCT_Dysymtab:
- if (DysymtabLCI)
- return Error("unexpected input object (multiple symbol tables)");
- DysymtabLCI = &LCI;
- break;
- default:
- return Error("unexpected input object (unexpected load command");
+ if (Arch == Triple::arm && RelType == macho::RIT_ARM_Branch24Bit) {
+ // This is an ARM branch relocation, need to use a stub function.
+
+ // Look up for existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end())
+ resolveRelocation(Target, (uint64_t)Target,
+ (uint64_t)Section.Address + i->second,
+ RelType, 0);
+ else {
+ // Create a new stub function.
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+ AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address,
+ macho::RIT_Vanilla);
+ resolveRelocation(Target, (uint64_t)Target,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ Section.StubOffset += getMaxStubSize();
}
- }
-
- if (!SymtabLCI)
- return Error("no symbol table found in object");
- if (!SegmentLCI)
- return Error("no symbol table found in object");
-
- // Read and register the symbol table data.
- InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
- Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
- if (!SymtabLC)
- return Error("unable to load symbol table load command");
- Obj->RegisterStringTable(*SymtabLC);
-
- // Read the dynamic link-edit information, if present (not present in static
- // objects).
- if (DysymtabLCI) {
- InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
- Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
- if (!DysymtabLC)
- return Error("unable to load dynamic link-exit load command");
-
- // FIXME: We don't support anything interesting yet.
-// if (DysymtabLC->LocalSymbolsIndex != 0)
-// return Error("NOT YET IMPLEMENTED: local symbol entries");
-// if (DysymtabLC->ExternalSymbolsIndex != 0)
-// return Error("NOT YET IMPLEMENTED: non-external symbol entries");
-// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
-// return Error("NOT YET IMPLEMENTED: undefined symbol entries");
- }
-
- // Load the segment load command.
- if (SegmentLCI->Command.Type == macho::LCT_Segment) {
- if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
- return true;
- } else {
- if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
- return true;
- }
-
- return false;
+ } else
+ AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
}
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
- // Assign the address in our symbol table.
- SymbolTable[Name] = Addr;
-
- RelocationList &Relocs = Relocations[Name];
- for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
- RelocationEntry &RE = Relocs[i];
- uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
- bool isPCRel = (RE.Data >> 24) & 1;
- unsigned Type = (RE.Data >> 28) & 0xf;
- unsigned Size = 1 << ((RE.Data >> 25) & 3);
-
- DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
- << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
- << " from '" << Name << " (" << format("%p", Addr) << ")"
- << "(" << (isPCRel ? "pcrel" : "absolute")
- << ", type: " << Type << ", Size: " << Size << ").\n");
-
- resolveRelocation(Target, Addr, isPCRel, Type, Size);
- RE.isResolved = true;
- }
-}
-bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+bool RuntimeDyldMachO::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
if (Magic == "\xFE\xED\xFA\xCE") return true;
if (Magic == "\xCE\xFA\xED\xFE") return true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
new file mode 100644
index 000000000000..898b85190e71
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -0,0 +1,70 @@
+//===-- RuntimeDyldMachO.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_MACHO_H
+#define LLVM_RUNTIME_DYLD_MACHO_H
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Support/Format.h"
+#include "RuntimeDyldImpl.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+
+namespace llvm {
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+protected:
+ bool resolveI386Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+ bool resolveX86_64Relocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+ bool resolveARMRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ bool isPCRel,
+ unsigned Type,
+ unsigned Size,
+ int64_t Addend);
+
+ virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ LocalSymbolMap &Symbols, StubMap &Stubs);
+
+public:
+ virtual void resolveRelocation(uint8_t *LocalAddress,
+ uint64_t FinalAddress,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
+ RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+ bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 004b8656bf22..42364f9b70f7 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This just asks the TargetRegistry for the appropriate JIT to use, and allows
-// the user to specify a specific one on the commandline with -march=x. Clients
-// should initialize targets prior to calling createJIT.
+// This just asks the TargetRegistry for the appropriate target to use, and
+// allows the user to specify a specific one on the commandline with -march=x,
+// -mcpu=y, and -mattr=a,-b,+c. Clients should initialize targets prior to
+// calling selectTarget().
//
//===----------------------------------------------------------------------===//
@@ -21,21 +22,27 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
+TargetMachine *EngineBuilder::selectTarget() {
+ StringRef MArch = "";
+ StringRef MCPU = "";
+ SmallVector<std::string, 1> MAttrs;
+ Triple TT(M->getTargetTriple());
+
+ return selectTarget(TT, MArch, MCPU, MAttrs);
+}
+
/// selectTarget - Pick a target either via -march or by guessing the native
/// arch. Add any CPU features specified via -mcpu or -mattr.
-TargetMachine *EngineBuilder::selectTarget(Module *Mod,
+TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
StringRef MArch,
StringRef MCPU,
- const SmallVectorImpl<std::string>& MAttrs,
- Reloc::Model RM,
- CodeModel::Model CM,
- std::string *ErrorStr) {
- Triple TheTriple(Mod->getTargetTriple());
+ const SmallVectorImpl<std::string>& MAttrs) {
+ Triple TheTriple(TargetTriple);
if (TheTriple.getTriple().empty())
- TheTriple.setTriple(sys::getHostTriple());
+ TheTriple.setTriple(sys::getDefaultTargetTriple());
// Adjust the triple to match what the user requested.
const Target *TheTarget = 0;
@@ -55,7 +62,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
}
// Adjust the triple to match (if known), otherwise stick with the
- // module/host triple.
+ // requested/host triple.
Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
if (Type != Triple::UnknownArch)
TheTriple.setArch(Type);
@@ -69,12 +76,6 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
}
}
- if (!TheTarget->hasJIT()) {
- errs() << "WARNING: This target JIT is not designed for the host you are"
- << " running. If bad things happen, please choose a different "
- << "-march switch.\n";
- }
-
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
if (!MAttrs.empty()) {
@@ -87,7 +88,9 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
// Allocate a target...
TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
MCPU, FeaturesStr,
- RM, CM);
+ Options,
+ RelocModel, CMModel,
+ OptLevel);
assert(Target && "Could not allocate target machine!");
return Target;
}
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
new file mode 100644
index 000000000000..e22b8cd406b2
--- /dev/null
+++ b/lib/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/LLVMBuild.txt --------------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore
+
+[component_0]
+type = Group
+name = Libraries
+parent = $ROOT
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 4d8824bfcb3f..0b6d2f4218e3 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -4,11 +4,3 @@ add_llvm_library(LLVMLinker
LinkModules.cpp
Linker.cpp
)
-
-add_llvm_library_dependencies(LLVMLinker
- LLVMArchive
- LLVMBitReader
- LLVMCore
- LLVMSupport
- LLVMTransformUtils
- )
diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt
new file mode 100644
index 000000000000..2b4c232b8067
--- /dev/null
+++ b/lib/Linker/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Linker/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Linker
+parent = Libraries
+required_libraries = Archive BitReader Core Support TransformUtils
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index 2c4ed7fdc17a..c16d1958cdfb 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -16,7 +16,6 @@
#include "llvm/Module.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/Bitcode/Archive.h"
-#include "llvm/Config/config.h"
#include <memory>
#include <set>
using namespace llvm;
@@ -141,7 +140,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
// Find the modules we need to link into the target module. Note that arch
// keeps ownership of these modules and may return the same Module* from a
// subsequent call.
- std::set<Module*> Modules;
+ SmallVector<Module*, 16> Modules;
if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
return error("Cannot find symbols in '" + Filename.str() +
"': " + ErrMsg);
@@ -158,7 +157,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
UndefinedSymbols.end());
// Loop over all the Modules that we got back from the archive
- for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end();
+ for (SmallVectorImpl<Module*>::iterator I=Modules.begin(), E=Modules.end();
I != E; ++I) {
// Get the module we must link in.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 03a962e3be5d..765fcc88235b 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -16,11 +16,16 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <cctype>
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -38,11 +43,16 @@ class TypeMapTy : public ValueMapTypeRemapper {
/// case we need to roll back.
SmallVector<Type*, 16> SpeculativeTypes;
- /// DefinitionsToResolve - This is a list of non-opaque structs in the source
- /// module that are mapped to an opaque struct in the destination module.
- SmallVector<StructType*, 16> DefinitionsToResolve;
-public:
+ /// SrcDefinitionsToResolve - This is a list of non-opaque structs in the
+ /// source module that are mapped to an opaque struct in the destination
+ /// module.
+ SmallVector<StructType*, 16> SrcDefinitionsToResolve;
+ /// DstResolvedOpaqueTypes - This is the set of opaque types in the
+ /// destination modules who are getting a body from the source module.
+ SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
+
+public:
/// addTypeMapping - Indicate that the specified type in the destination
/// module is conceptually equivalent to the specified type in the source
/// module.
@@ -58,6 +68,18 @@ public:
FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
+ /// dump - Dump out the type map for debugging purposes.
+ void dump() const {
+ for (DenseMap<Type*, Type*>::const_iterator
+ I = MappedTypes.begin(), E = MappedTypes.end(); I != E; ++I) {
+ dbgs() << "TypeMap: ";
+ I->first->dump();
+ dbgs() << " => ";
+ I->second->dump();
+ dbgs() << '\n';
+ }
+ }
+
private:
Type *getImpl(Type *T);
/// remapType - Implement the ValueMapTypeRemapper interface.
@@ -118,11 +140,17 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
return true;
}
- // Mapping a non-opaque source type to an opaque dest. Keep the dest, but
- // fill it in later. This doesn't need to be speculative.
+ // Mapping a non-opaque source type to an opaque dest. If this is the first
+ // type that we're mapping onto this destination type then we succeed. Keep
+ // the dest, but fill it in later. This doesn't need to be speculative. If
+ // this is the second (different) type that we're trying to map onto the
+ // same opaque type then we fail.
if (cast<StructType>(DstTy)->isOpaque()) {
+ // We can only map one source type onto the opaque destination type.
+ if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)))
+ return false;
+ SrcDefinitionsToResolve.push_back(SSTy);
Entry = DstTy;
- DefinitionsToResolve.push_back(SSTy);
return true;
}
}
@@ -137,6 +165,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
return false;
+
} else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
return false;
@@ -174,9 +203,9 @@ void TypeMapTy::linkDefinedTypeBodies() {
SmallString<16> TmpName;
// Note that processing entries in this loop (calling 'get') can add new
- // entries to the DefinitionsToResolve vector.
- while (!DefinitionsToResolve.empty()) {
- StructType *SrcSTy = DefinitionsToResolve.pop_back_val();
+ // entries to the SrcDefinitionsToResolve vector.
+ while (!SrcDefinitionsToResolve.empty()) {
+ StructType *SrcSTy = SrcDefinitionsToResolve.pop_back_val();
StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
// TypeMap is a many-to-one mapping, if there were multiple types that
@@ -204,16 +233,17 @@ void TypeMapTy::linkDefinedTypeBodies() {
TmpName.clear();
}
}
+
+ DstResolvedOpaqueTypes.clear();
}
-
/// get - Return the mapped type to use for the specified input type from the
/// source module.
Type *TypeMapTy::get(Type *Ty) {
Type *Result = getImpl(Ty);
// If this caused a reference to any struct type, resolve it before returning.
- if (!DefinitionsToResolve.empty())
+ if (!SrcDefinitionsToResolve.empty())
linkDefinedTypeBodies();
return Result;
}
@@ -252,7 +282,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// Otherwise, rebuild a modified type.
switch (Ty->getTypeID()) {
- default: assert(0 && "unknown derived type to remap");
+ default: llvm_unreachable("unknown derived type to remap");
case Type::ArrayTyID:
return *Entry = ArrayType::get(ElementTypes[0],
cast<ArrayType>(Ty)->getNumElements());
@@ -304,12 +334,12 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// Otherwise we create a new type and resolve its body later. This will be
// resolved by the top level of get().
- DefinitionsToResolve.push_back(STy);
- return *Entry = StructType::create(STy->getContext());
+ SrcDefinitionsToResolve.push_back(STy);
+ StructType *DTy = StructType::create(STy->getContext());
+ DstResolvedOpaqueTypes.insert(DTy);
+ return *Entry = DTy;
}
-
-
//===----------------------------------------------------------------------===//
// ModuleLinker implementation.
//===----------------------------------------------------------------------===//
@@ -341,6 +371,9 @@ namespace {
// Set of items not to link in from source.
SmallPtrSet<const Value*, 16> DoNotLinkFromSource;
+ // Vector of functions to lazily link in.
+ std::vector<Function*> LazilyLinkFunctions;
+
public:
std::string ErrorMsg;
@@ -360,7 +393,9 @@ namespace {
/// getLinkageResult - This analyzes the two global values and determines
/// what the result will look like in the destination module.
bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
- GlobalValue::LinkageTypes &LT, bool &LinkFromSrc);
+ GlobalValue::LinkageTypes &LT,
+ GlobalValue::VisibilityTypes &Vis,
+ bool &LinkFromSrc);
/// getLinkedToGlobal - Given a global in the source module, return the
/// global in the destination module that is being linked to, if any.
@@ -384,11 +419,19 @@ namespace {
}
void computeTypeMapping();
+ bool categorizeModuleFlagNodes(const NamedMDNode *ModFlags,
+ DenseMap<MDString*, MDNode*> &ErrorNode,
+ DenseMap<MDString*, MDNode*> &WarningNode,
+ DenseMap<MDString*, MDNode*> &OverrideNode,
+ DenseMap<MDString*,
+ SmallSetVector<MDNode*, 8> > &RequireNodes,
+ SmallSetVector<MDString*, 16> &SeenIDs);
bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
bool linkGlobalProto(GlobalVariable *SrcGV);
bool linkFunctionProto(Function *SrcF);
bool linkAliasProto(GlobalAlias *SrcA);
+ bool linkModuleFlagsMetadata();
void linkAppendingVarInit(const AppendingVarInfo &AVI);
void linkGlobalInits();
@@ -398,8 +441,6 @@ namespace {
};
}
-
-
/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
/// in the symbol table. This is good for all clients except for us. Go
/// through the trouble to force this back.
@@ -421,9 +462,9 @@ static void forceRenaming(GlobalValue *GV, StringRef Name) {
}
}
-/// CopyGVAttributes - copy additional attributes (those not needed to construct
+/// copyGVAttributes - copy additional attributes (those not needed to construct
/// a GlobalValue) from the SrcGV to the DestGV.
-static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
// Use the maximum alignment, rather than just copying the alignment of SrcGV.
unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
DestGV->copyAttributesFrom(SrcGV);
@@ -432,21 +473,33 @@ static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
forceRenaming(DestGV, SrcGV->getName());
}
+static bool isLessConstraining(GlobalValue::VisibilityTypes a,
+ GlobalValue::VisibilityTypes b) {
+ if (a == GlobalValue::HiddenVisibility)
+ return false;
+ if (b == GlobalValue::HiddenVisibility)
+ return true;
+ if (a == GlobalValue::ProtectedVisibility)
+ return false;
+ if (b == GlobalValue::ProtectedVisibility)
+ return true;
+ return false;
+}
+
/// getLinkageResult - This analyzes the two global values and determines what
/// the result will look like in the destination module. In particular, it
-/// computes the resultant linkage type, computes whether the global in the
-/// source should be copied over to the destination (replacing the existing
-/// one), and computes whether this linkage is an error or not. It also performs
-/// visibility checks: we cannot link together two symbols with different
-/// visibilities.
+/// computes the resultant linkage type and visibility, computes whether the
+/// global in the source should be copied over to the destination (replacing
+/// the existing one), and computes whether this linkage is an error or not.
bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
- GlobalValue::LinkageTypes &LT,
+ GlobalValue::LinkageTypes &LT,
+ GlobalValue::VisibilityTypes &Vis,
bool &LinkFromSrc) {
assert(Dest && "Must have two globals being queried");
assert(!Src->hasLocalLinkage() &&
"If Src has internal linkage, Dest shouldn't be set!");
- bool SrcIsDeclaration = Src->isDeclaration();
+ bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable();
bool DestIsDeclaration = Dest->isDeclaration();
if (SrcIsDeclaration) {
@@ -502,13 +555,10 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
"': symbol multiply defined!");
}
- // Check visibility
- if (Src->getVisibility() != Dest->getVisibility() &&
- !SrcIsDeclaration && !DestIsDeclaration &&
- !Src->hasAvailableExternallyLinkage() &&
- !Dest->hasAvailableExternallyLinkage())
- return emitError("Linking globals named '" + Src->getName() +
- "': symbols have different visibilities!");
+ // Compute the visibility. We follow the rules in the System V Application
+ // Binary Interface.
+ Vis = isLessConstraining(Src->getVisibility(), Dest->getVisibility()) ?
+ Dest->getVisibility() : Src->getVisibility();
return false;
}
@@ -539,7 +589,54 @@ void ModuleLinker::computeTypeMapping() {
if (GlobalValue *DGV = getLinkedToGlobal(I))
TypeMap.addTypeMapping(DGV->getType(), I->getType());
}
-
+
+ // Incorporate types by name, scanning all the types in the source module.
+ // At this point, the destination module may have a type "%foo = { i32 }" for
+ // example. When the source module got loaded into the same LLVMContext, if
+ // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
+ std::vector<StructType*> SrcStructTypes;
+ SrcM->findUsedStructTypes(SrcStructTypes);
+ SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
+ SrcStructTypes.end());
+
+ std::vector<StructType*> DstStructTypes;
+ DstM->findUsedStructTypes(DstStructTypes);
+ SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
+ DstStructTypes.end());
+
+ for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
+ StructType *ST = SrcStructTypes[i];
+ if (!ST->hasName()) continue;
+
+ // Check to see if there is a dot in the name followed by a digit.
+ size_t DotPos = ST->getName().rfind('.');
+ if (DotPos == 0 || DotPos == StringRef::npos ||
+ ST->getName().back() == '.' || !isdigit(ST->getName()[DotPos+1]))
+ continue;
+
+ // Check to see if the destination module has a struct with the prefix name.
+ if (StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos)))
+ // Don't use it if this actually came from the source module. They're in
+ // the same LLVMContext after all. Also don't use it unless the type is
+ // actually used in the destination module. This can happen in situations
+ // like this:
+ //
+ // Module A Module B
+ // -------- --------
+ // %Z = type { %A } %B = type { %C.1 }
+ // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* }
+ // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] }
+ // %C = type { i8* } %B.3 = type { %C.1 }
+ //
+ // When we link Module B with Module A, the '%B' in Module B is
+ // used. However, that would then use '%C.1'. But when we process '%C.1',
+ // we prefer to take the '%C' version. So we are then left with both
+ // '%C.1' and '%C' being used for the same types. This leads to some
+ // variables using one type and some using the other.
+ if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST))
+ TypeMap.addTypeMapping(DST, ST);
+ }
+
// Don't bother incorporating aliases, they aren't generally typed well.
// Now that we have discovered all of the type equivalences, get a body for
@@ -590,7 +687,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
DstGV->getType()->getAddressSpace());
// Propagate alignment, visibility and section info.
- CopyGVAttributes(NG, DstGV);
+ copyGVAttributes(NG, DstGV);
AppendingVarInfo AVI;
AVI.NewGV = NG;
@@ -615,6 +712,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
/// merge them into the dest module.
bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
GlobalValue *DGV = getLinkedToGlobal(SGV);
+ llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
if (DGV) {
// Concatenation of appending linkage variables is magic and handled later.
@@ -624,9 +722,11 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
// Determine whether linkage of these two globals follows the source
// module's definition or the destination module's definition.
GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
bool LinkFromSrc = false;
- if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc))
+ if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
return true;
+ NewVisibility = NV;
// If we're not linking from the source, then keep the definition that we
// have.
@@ -636,9 +736,10 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
DGVar->setConstant(true);
- // Set calculated linkage.
+ // Set calculated linkage and visibility.
DGV->setLinkage(NewLinkage);
-
+ DGV->setVisibility(*NewVisibility);
+
// Make sure to remember this mapping.
ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
@@ -660,7 +761,9 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
SGV->isThreadLocal(),
SGV->getType()->getAddressSpace());
// Propagate alignment, visibility and section info.
- CopyGVAttributes(NewDGV, SGV);
+ copyGVAttributes(NewDGV, SGV);
+ if (NewVisibility)
+ NewDGV->setVisibility(*NewVisibility);
if (DGV) {
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
@@ -676,17 +779,21 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
/// destination module if needed, setting up mapping information.
bool ModuleLinker::linkFunctionProto(Function *SF) {
GlobalValue *DGV = getLinkedToGlobal(SF);
+ llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
if (DGV) {
GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
bool LinkFromSrc = false;
- if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc))
+ GlobalValue::VisibilityTypes NV;
+ if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
return true;
-
+ NewVisibility = NV;
+
if (!LinkFromSrc) {
// Set calculated linkage
DGV->setLinkage(NewLinkage);
-
+ DGV->setVisibility(*NewVisibility);
+
// Make sure to remember this mapping.
ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
@@ -702,12 +809,21 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
// bring SF over.
Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
SF->getLinkage(), SF->getName(), DstM);
- CopyGVAttributes(NewDF, SF);
+ copyGVAttributes(NewDF, SF);
+ if (NewVisibility)
+ NewDF->setVisibility(*NewVisibility);
if (DGV) {
// Any uses of DF need to change to NewDF, with cast.
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
DGV->eraseFromParent();
+ } else {
+ // Internal, LO_ODR, or LO linkage - stick in set to ignore and lazily link.
+ if (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() ||
+ SF->hasAvailableExternallyLinkage()) {
+ DoNotLinkFromSource.insert(SF);
+ LazilyLinkFunctions.push_back(SF);
+ }
}
ValueMap[SF] = NewDF;
@@ -718,17 +834,21 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
/// source module.
bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
GlobalValue *DGV = getLinkedToGlobal(SGA);
-
+ llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+
if (DGV) {
GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
bool LinkFromSrc = false;
- if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc))
+ if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc))
return true;
-
+ NewVisibility = NV;
+
if (!LinkFromSrc) {
// Set calculated linkage.
DGV->setLinkage(NewLinkage);
-
+ DGV->setVisibility(*NewVisibility);
+
// Make sure to remember this mapping.
ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
@@ -744,7 +864,9 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
SGA->getLinkage(), SGA->getName(),
/*aliasee*/0, DstM);
- CopyGVAttributes(NewDA, SGA);
+ copyGVAttributes(NewDA, SGA);
+ if (NewVisibility)
+ NewDA->setVisibility(*NewVisibility);
if (DGV) {
// Any uses of DGV need to change to NewDA, with cast.
@@ -756,36 +878,27 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
return false;
}
+static void getArrayElements(Constant *C, SmallVectorImpl<Constant*> &Dest) {
+ unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
+
+ for (unsigned i = 0; i != NumElements; ++i)
+ Dest.push_back(C->getAggregateElement(i));
+}
+
void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
// Merge the initializer.
SmallVector<Constant*, 16> Elements;
- if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- Elements.push_back(I->getOperand(i));
- } else {
- assert(isa<ConstantAggregateZero>(AVI.DstInit));
- ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType());
- Type *EltTy = DstAT->getElementType();
- Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy));
- }
+ getArrayElements(AVI.DstInit, Elements);
Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
- if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- Elements.push_back(I->getOperand(i));
- } else {
- assert(isa<ConstantAggregateZero>(SrcInit));
- ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType());
- Type *EltTy = SrcAT->getElementType();
- Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy));
- }
+ getArrayElements(SrcInit, Elements);
+
ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
}
-
-// linkGlobalInits - Update the initializers in the Dest module now that all
-// globals that may be referenced are in Dest.
+/// linkGlobalInits - Update the initializers in the Dest module now that all
+/// globals that may be referenced are in Dest.
void ModuleLinker::linkGlobalInits() {
// Loop over all of the globals in the src module, mapping them over as we go
for (Module::const_global_iterator I = SrcM->global_begin(),
@@ -802,9 +915,9 @@ void ModuleLinker::linkGlobalInits() {
}
}
-// linkFunctionBody - Copy the source function over into the dest function and
-// fix up references to values. At this point we know that Dest is an external
-// function, and that Src is not.
+/// linkFunctionBody - Copy the source function over into the dest function and
+/// fix up references to values. At this point we know that Dest is an external
+/// function, and that Src is not.
void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
@@ -833,7 +946,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
} else {
// Clone the body of the function into the dest function.
SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
- CloneFunctionInto(Dst, Src, ValueMap, false, Returns);
+ CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap);
}
// There is no need to map the arguments anymore.
@@ -843,7 +956,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
}
-
+/// linkAliasBodies - Insert all of the aliases in Src into the Dest module.
void ModuleLinker::linkAliasBodies() {
for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
I != E; ++I) {
@@ -856,11 +969,14 @@ void ModuleLinker::linkAliasBodies() {
}
}
-/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
+/// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest
/// module.
void ModuleLinker::linkNamedMDNodes() {
+ const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
E = SrcM->named_metadata_end(); I != E; ++I) {
+ // Don't link module flags here. Do them separately.
+ if (&*I == SrcModFlags) continue;
NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
// Add Src elements into Dest node.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -868,10 +984,176 @@ void ModuleLinker::linkNamedMDNodes() {
RF_None, &TypeMap));
}
}
+
+/// categorizeModuleFlagNodes - Categorize the module flags according to their
+/// type: Error, Warning, Override, and Require.
+bool ModuleLinker::
+categorizeModuleFlagNodes(const NamedMDNode *ModFlags,
+ DenseMap<MDString*, MDNode*> &ErrorNode,
+ DenseMap<MDString*, MDNode*> &WarningNode,
+ DenseMap<MDString*, MDNode*> &OverrideNode,
+ DenseMap<MDString*,
+ SmallSetVector<MDNode*, 8> > &RequireNodes,
+ SmallSetVector<MDString*, 16> &SeenIDs) {
+ bool HasErr = false;
+
+ for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *Op = ModFlags->getOperand(I);
+ assert(Op->getNumOperands() == 3 && "Invalid module flag metadata!");
+ assert(isa<ConstantInt>(Op->getOperand(0)) &&
+ "Module flag's first operand must be an integer!");
+ assert(isa<MDString>(Op->getOperand(1)) &&
+ "Module flag's second operand must be an MDString!");
+
+ ConstantInt *Behavior = cast<ConstantInt>(Op->getOperand(0));
+ MDString *ID = cast<MDString>(Op->getOperand(1));
+ Value *Val = Op->getOperand(2);
+ switch (Behavior->getZExtValue()) {
+ default:
+ assert(false && "Invalid behavior in module flag metadata!");
+ break;
+ case Module::Error: {
+ MDNode *&ErrNode = ErrorNode[ID];
+ if (!ErrNode) ErrNode = Op;
+ if (ErrNode->getOperand(2) != Val)
+ HasErr = emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting values");
+ break;
+ }
+ case Module::Warning: {
+ MDNode *&WarnNode = WarningNode[ID];
+ if (!WarnNode) WarnNode = Op;
+ if (WarnNode->getOperand(2) != Val)
+ errs() << "WARNING: linking module flags '" << ID->getString()
+ << "': IDs have conflicting values";
+ break;
+ }
+ case Module::Require: RequireNodes[ID].insert(Op); break;
+ case Module::Override: {
+ MDNode *&OvrNode = OverrideNode[ID];
+ if (!OvrNode) OvrNode = Op;
+ if (OvrNode->getOperand(2) != Val)
+ HasErr = emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting override values");
+ break;
+ }
+ }
+
+ SeenIDs.insert(ID);
+ }
+
+ return HasErr;
+}
+
+/// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest
+/// module.
+bool ModuleLinker::linkModuleFlagsMetadata() {
+ const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
+ if (!SrcModFlags) return false;
+
+ NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
+
+ // If the destination module doesn't have module flags yet, then just copy
+ // over the source module's flags.
+ if (DstModFlags->getNumOperands() == 0) {
+ for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
+ DstModFlags->addOperand(SrcModFlags->getOperand(I));
+
+ return false;
+ }
+
+ bool HasErr = false;
+
+ // Otherwise, we have to merge them based on their behaviors. First,
+ // categorize all of the nodes in the modules' module flags. If an error or
+ // warning occurs, then emit the appropriate message(s).
+ DenseMap<MDString*, MDNode*> ErrorNode;
+ DenseMap<MDString*, MDNode*> WarningNode;
+ DenseMap<MDString*, MDNode*> OverrideNode;
+ DenseMap<MDString*, SmallSetVector<MDNode*, 8> > RequireNodes;
+ SmallSetVector<MDString*, 16> SeenIDs;
+
+ HasErr |= categorizeModuleFlagNodes(SrcModFlags, ErrorNode, WarningNode,
+ OverrideNode, RequireNodes, SeenIDs);
+ HasErr |= categorizeModuleFlagNodes(DstModFlags, ErrorNode, WarningNode,
+ OverrideNode, RequireNodes, SeenIDs);
+
+ // Check that there isn't both an error and warning node for a flag.
+ for (SmallSetVector<MDString*, 16>::iterator
+ I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
+ MDString *ID = *I;
+ if (ErrorNode[ID] && WarningNode[ID])
+ HasErr = emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting behaviors");
+ }
+
+ // Early exit if we had an error.
+ if (HasErr) return true;
+
+ // Get the destination's module flags ready for new operands.
+ DstModFlags->dropAllReferences();
+
+ // Add all of the module flags to the destination module.
+ DenseMap<MDString*, SmallVector<MDNode*, 4> > AddedNodes;
+ for (SmallSetVector<MDString*, 16>::iterator
+ I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
+ MDString *ID = *I;
+ if (OverrideNode[ID]) {
+ DstModFlags->addOperand(OverrideNode[ID]);
+ AddedNodes[ID].push_back(OverrideNode[ID]);
+ } else if (ErrorNode[ID]) {
+ DstModFlags->addOperand(ErrorNode[ID]);
+ AddedNodes[ID].push_back(ErrorNode[ID]);
+ } else if (WarningNode[ID]) {
+ DstModFlags->addOperand(WarningNode[ID]);
+ AddedNodes[ID].push_back(WarningNode[ID]);
+ }
+
+ for (SmallSetVector<MDNode*, 8>::iterator
+ II = RequireNodes[ID].begin(), IE = RequireNodes[ID].end();
+ II != IE; ++II)
+ DstModFlags->addOperand(*II);
+ }
+
+ // Now check that all of the requirements have been satisfied.
+ for (SmallSetVector<MDString*, 16>::iterator
+ I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
+ MDString *ID = *I;
+ SmallSetVector<MDNode*, 8> &Set = RequireNodes[ID];
+
+ for (SmallSetVector<MDNode*, 8>::iterator
+ II = Set.begin(), IE = Set.end(); II != IE; ++II) {
+ MDNode *Node = *II;
+ assert(isa<MDNode>(Node->getOperand(2)) &&
+ "Module flag's third operand must be an MDNode!");
+ MDNode *Val = cast<MDNode>(Node->getOperand(2));
+
+ MDString *ReqID = cast<MDString>(Val->getOperand(0));
+ Value *ReqVal = Val->getOperand(1);
+
+ bool HasValue = false;
+ for (SmallVectorImpl<MDNode*>::iterator
+ RI = AddedNodes[ReqID].begin(), RE = AddedNodes[ReqID].end();
+ RI != RE; ++RI) {
+ MDNode *ReqNode = *RI;
+ if (ReqNode->getOperand(2) == ReqVal) {
+ HasValue = true;
+ break;
+ }
+ }
+
+ if (!HasValue)
+ HasErr = emitError("linking module flags '" + ReqID->getString() +
+ "': does not have the required value");
+ }
+ }
+
+ return HasErr;
+}
bool ModuleLinker::run() {
- assert(DstM && "Null Destination module");
- assert(SrcM && "Null Source Module");
+ assert(DstM && "Null destination module");
+ assert(SrcM && "Null source module");
// Inherit the target data from the source module if the destination module
// doesn't have one already.
@@ -951,7 +1233,6 @@ bool ModuleLinker::run() {
// Link in the function bodies that are defined in the source module into
// DstM.
for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
-
// Skip if not linking from source.
if (DoNotLinkFromSource.count(SF)) continue;
@@ -964,16 +1245,70 @@ bool ModuleLinker::run() {
}
linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+ SF->Dematerialize();
}
// Resolve all uses of aliases with aliasees.
linkAliasBodies();
- // Remap all of the named mdnoes in Src into the DstM module. We do this
+ // Remap all of the named MDNodes in Src into the DstM module. We do this
// after linking GlobalValues so that MDNodes that reference GlobalValues
// are properly remapped.
linkNamedMDNodes();
+ // Merge the module flags into the DstM module.
+ if (linkModuleFlagsMetadata())
+ return true;
+
+ // Process vector of lazily linked in functions.
+ bool LinkedInAnyFunctions;
+ do {
+ LinkedInAnyFunctions = false;
+
+ for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
+ E = LazilyLinkFunctions.end(); I != E; ++I) {
+ if (!*I)
+ continue;
+
+ Function *SF = *I;
+ Function *DF = cast<Function>(ValueMap[SF]);
+
+ if (!DF->use_empty()) {
+
+ // Materialize if necessary.
+ if (SF->isDeclaration()) {
+ if (!SF->isMaterializable())
+ continue;
+ if (SF->Materialize(&ErrorMsg))
+ return true;
+ }
+
+ // Link in function body.
+ linkFunctionBody(DF, SF);
+ SF->Dematerialize();
+
+ // "Remove" from vector by setting the element to 0.
+ *I = 0;
+
+ // Set flag to indicate we may have more functions to lazily link in
+ // since we linked in a function.
+ LinkedInAnyFunctions = true;
+ }
+ }
+ } while (LinkedInAnyFunctions);
+
+ // Remove any prototypes of functions that were not actually linked in.
+ for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
+ E = LazilyLinkFunctions.end(); I != E; ++I) {
+ if (!*I)
+ continue;
+
+ Function *SF = *I;
+ Function *DF = cast<Function>(ValueMap[SF]);
+ if (DF->use_empty())
+ DF->eraseFromParent();
+ }
+
// Now that all of the types from the source are used, resolve any structs
// copied over to the dest that didn't exist there.
TypeMap.linkDefinedTypeBodies();
@@ -985,11 +1320,11 @@ bool ModuleLinker::run() {
// LinkModules entrypoint.
//===----------------------------------------------------------------------===//
-// LinkModules - This function links two modules together, with the resulting
-// left module modified to be the composite of the two input modules. If an
-// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
-// the problem. Upon failure, the Dest module could be in a modified state, and
-// shouldn't be relied on to be consistent.
+/// LinkModules - This function links two modules together, with the resulting
+/// left module modified to be the composite of the two input modules. If an
+/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+/// the problem. Upon failure, the Dest module could be in a modified state,
+/// and shouldn't be relied on to be consistent.
bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
std::string *ErrorMsg) {
ModuleLinker TheLinker(Dest, Src, Mode);
@@ -997,6 +1332,6 @@ bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
return true;
}
-
+
return false;
}
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index 59fbceb6a308..7c6cf4f3dd78 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -17,7 +17,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a4ac1bf60529..f11e686fd104 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -20,7 +20,6 @@ add_llvm_library(LLVMMC
MCInstPrinter.cpp
MCInstrAnalysis.cpp
MCLabel.cpp
- MCLoggingStreamer.cpp
MCMachOStreamer.cpp
MCMachObjectTargetWriter.cpp
MCModule.cpp
@@ -45,10 +44,5 @@ add_llvm_library(LLVMMC
WinCOFFStreamer.cpp
)
-add_llvm_library_dependencies(LLVMMC
- LLVMObject
- LLVMSupport
- )
-
add_subdirectory(MCParser)
add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 3d16de5604f6..9fc33b6b3e5e 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -11,26 +11,26 @@
//
//===----------------------------------------------------------------------===//
-#include "ELFObjectWriter.h"
+#include "MCELF.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-
-#include "../Target/X86/MCTargetDesc/X86FixupKinds.h"
-#include "../Target/ARM/MCTargetDesc/ARMFixupKinds.h"
-#include "../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h"
#include <vector>
using namespace llvm;
@@ -38,6 +38,304 @@ using namespace llvm;
#undef DEBUG_TYPE
#define DEBUG_TYPE "reloc-info"
+namespace {
+class ELFObjectWriter : public MCObjectWriter {
+ protected:
+
+ static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+ static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+ static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+ bool Used, bool Renamed);
+ static bool isLocal(const MCSymbolData &Data, bool isSignature,
+ bool isUsedInReloc);
+ static bool IsELFMetaDataSection(const MCSectionData &SD);
+ static uint64_t DataSectionSize(const MCSectionData &SD);
+ static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD);
+ static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD);
+
+ void WriteDataSectionData(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionELF &Section);
+
+ /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+ }*/
+
+ /// ELFSymbolData - Helper struct for containing some precomputed
+ /// information on symbols.
+ struct ELFSymbolData {
+ MCSymbolData *SymbolData;
+ uint64_t StringIndex;
+ uint32_t SectionIndex;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFSymbolData &RHS) const {
+ if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+ return true;
+ if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+ return false;
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+ }
+ };
+
+ /// The target specific ELF writer instance.
+ llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+ SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+ SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+ DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+ llvm::DenseMap<const MCSectionData*,
+ std::vector<ELFRelocationEntry> > Relocations;
+ DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+ /// @}
+ /// @name Symbol Table Data
+ /// @{
+
+ SmallString<256> StringTable;
+ std::vector<ELFSymbolData> LocalSymbolData;
+ std::vector<ELFSymbolData> ExternalSymbolData;
+ std::vector<ELFSymbolData> UndefinedSymbolData;
+
+ /// @}
+
+ bool NeedsGOT;
+
+ bool NeedsSymtabShndx;
+
+ // This holds the symbol table index of the last local symbol.
+ unsigned LastLocalSymbolIndex;
+ // This holds the .strtab section index.
+ unsigned StringTableIndex;
+ // This holds the .symtab section index.
+ unsigned SymbolTableIndex;
+
+ unsigned ShstrtabIndex;
+
+
+ const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+ // TargetObjectWriter wrappers.
+ const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return TargetObjectWriter->ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+ }
+
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool hasRelocationAddend() const {
+ return TargetObjectWriter->hasRelocationAddend();
+ }
+ unsigned getEFlags() const {
+ return TargetObjectWriter->getEFlags();
+ }
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return TargetObjectWriter->GetRelocType(Target, Fixup, IsPCRel,
+ IsRelocWithSymbol, Addend);
+ }
+
+
+ public:
+ ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS, bool IsLittleEndian)
+ : MCObjectWriter(_OS, IsLittleEndian),
+ TargetObjectWriter(MOTW),
+ NeedsGOT(false), NeedsSymtabShndx(false){
+ }
+
+ virtual ~ELFObjectWriter();
+
+ void WriteWord(uint64_t W) {
+ if (is64Bit())
+ Write64(W);
+ else
+ Write32(W);
+ }
+
+ void StringLE16(char *buf, uint16_t Value) {
+ buf[0] = char(Value >> 0);
+ buf[1] = char(Value >> 8);
+ }
+
+ void StringLE32(char *buf, uint32_t Value) {
+ StringLE16(buf, uint16_t(Value >> 0));
+ StringLE16(buf + 2, uint16_t(Value >> 16));
+ }
+
+ void StringLE64(char *buf, uint64_t Value) {
+ StringLE32(buf, uint32_t(Value >> 0));
+ StringLE32(buf + 4, uint32_t(Value >> 32));
+ }
+
+ void StringBE16(char *buf ,uint16_t Value) {
+ buf[0] = char(Value >> 8);
+ buf[1] = char(Value >> 0);
+ }
+
+ void StringBE32(char *buf, uint32_t Value) {
+ StringBE16(buf, uint16_t(Value >> 16));
+ StringBE16(buf + 2, uint16_t(Value >> 0));
+ }
+
+ void StringBE64(char *buf, uint64_t Value) {
+ StringBE32(buf, uint32_t(Value >> 32));
+ StringBE32(buf + 4, uint32_t(Value >> 0));
+ }
+
+ void String8(MCDataFragment &F, uint8_t Value) {
+ char buf[1];
+ buf[0] = Value;
+ F.getContents() += StringRef(buf, 1);
+ }
+
+ void String16(MCDataFragment &F, uint16_t Value) {
+ char buf[2];
+ if (isLittleEndian())
+ StringLE16(buf, Value);
+ else
+ StringBE16(buf, Value);
+ F.getContents() += StringRef(buf, 2);
+ }
+
+ void String32(MCDataFragment &F, uint32_t Value) {
+ char buf[4];
+ if (isLittleEndian())
+ StringLE32(buf, Value);
+ else
+ StringBE32(buf, Value);
+ F.getContents() += StringRef(buf, 4);
+ }
+
+ void String64(MCDataFragment &F, uint64_t Value) {
+ char buf[8];
+ if (isLittleEndian())
+ StringLE64(buf, Value);
+ else
+ StringBE64(buf, Value);
+ F.getContents() += StringRef(buf, 8);
+ }
+
+ void WriteHeader(uint64_t SectionDataSize,
+ unsigned NumberOfSections);
+
+ void WriteSymbolEntry(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ uint64_t name, uint8_t info,
+ uint64_t value, uint64_t size,
+ uint8_t other, uint32_t shndx,
+ bool Reserved);
+
+ void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
+ const MCAsmLayout &Layout);
+
+ typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+ void WriteSymbolTable(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap);
+
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+ uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S);
+
+ // Map from a group section to the signature symbol
+ typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+ // Map from a signature symbol to the group section
+ typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+ // Map from a section to the section with the relocations
+ typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
+ // Map from a section to its offset
+ typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+
+ /// ComputeSymbolTable - Compute the symbol table data
+ ///
+ /// \param StringTable [out] - The string table data.
+ /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+ /// string table.
+ void ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap,
+ unsigned NumRegularSections);
+
+ void ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ RelMapTy &RelMap);
+
+ void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+ const RelMapTy &RelMap);
+
+ void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ // Create the sections that show up in the symbol table. Currently
+ // those are the .note.GNU-stack section and the group sections.
+ void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout);
+
+ void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap,
+ const SectionOffsetMapTy &SectionOffsetMap);
+
+ void ComputeSectionOrder(MCAssembler &Asm,
+ std::vector<const MCSectionELF*> &Sections);
+
+ void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Size, uint32_t Link, uint32_t Info,
+ uint64_t Alignment, uint64_t EntrySize);
+
+ void WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD);
+
+ virtual bool
+ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const;
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+ void WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size, uint64_t Alignment,
+ const MCSectionELF &Section);
+ };
+}
+
bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
const MCFixupKindInfo &FKI =
Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
@@ -92,11 +390,7 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
// e_ident[EI_OSABI]
- switch (TargetObjectWriter->getOSType()) {
- case Triple::FreeBSD: Write8(ELF::ELFOSABI_FREEBSD); break;
- case Triple::Linux: Write8(ELF::ELFOSABI_LINUX); break;
- default: Write8(ELF::ELFOSABI_NONE); break;
- }
+ Write8(TargetObjectWriter->getOSABI());
Write8(0); // e_ident[EI_ABIVERSION]
WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
@@ -112,7 +406,7 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes
// e_flags = whatever the target wants
- WriteEFlags();
+ Write32(getEFlags());
// e_ehsize = ELF header size
Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
@@ -181,7 +475,7 @@ uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
if (const MCExpr *Value = Symbol.getVariableValue()) {
int64_t IntValue;
if (Value->EvaluateAsAbsolute(IntValue, Layout))
- return (uint64_t)IntValue;
+ return (uint64_t)IntValue;
}
}
@@ -277,7 +571,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
MCDataFragment *ShndxF,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
- const SectionIndexMapTy &SectionIndexMap) {
+ const SectionIndexMapTy &SectionIndexMap) {
// The string table must be emitted first because we need the index
// into the string table for all the symbol names.
assert(StringTable.size() && "Missing string table");
@@ -306,7 +600,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
Section.getType() == ELF::SHT_SYMTAB_SHNDX)
continue;
WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
- ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
+ ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section),
+ false);
LastLocalSymbolIndex++;
}
@@ -416,7 +711,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
// Offset of the symbol in the section
int64_t a = Layout.getSymbolOffset(&SDB);
- // Ofeset of the relocation in the section
+ // Offset of the relocation in the section
int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
Value += b - a;
}
@@ -445,11 +740,16 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
FixedValue = Value;
unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
(RelocSymbol != 0), Addend);
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ if (RelocNeedsGOT(Modifier))
+ NeedsGOT = true;
uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
Fixup.getOffset();
- adjustFixupOffset(Fixup, RelocOffset);
+ // FIXME: no tests cover this. Is adjustFixupOffset dead code?
+ TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset);
if (!hasRelocationAddend())
Addend = 0;
@@ -459,7 +759,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
else
assert(isInt<32>(Addend));
- ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+ ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend, Fixup);
Relocations[Fragment->getParent()].push_back(ERE);
}
@@ -745,8 +1045,10 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
MCDataFragment *F,
const MCSectionData *SD) {
std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
- // sort by the r_offset just like gnu as does
- array_pod_sort(Relocs.begin(), Relocs.end());
+
+ // Sort the relocation entries. Most targets just sort by r_offset, but some
+ // (e.g., MIPS) have additional constraints.
+ TargetObjectWriter->sortRelocs(Asm, Relocs);
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
ELFRelocationEntry entry = Relocs[e - i - 1];
@@ -1053,14 +1355,10 @@ uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCSectionELF &Section) {
- uint64_t FileOff = OS.tell();
const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
- uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+ uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
WriteZeros(Padding);
- FileOff += Padding;
-
- FileOff += GetSectionFileSize(Layout, SD);
if (IsELFMetaDataSection(SD)) {
for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
@@ -1070,7 +1368,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
WriteBytes(cast<MCDataFragment>(F).getContents().str());
}
} else {
- Asm.WriteSectionData(&SD, Layout);
+ Asm.writeSectionData(&SD, Layout);
}
}
@@ -1226,16 +1524,13 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
for (unsigned i = 0; i < NumRegularSections + 1; ++i)
WriteDataSectionData(Asm, Layout, *Sections[i]);
- FileOff = OS.tell();
- uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+ uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
WriteZeros(Padding);
// ... then the section header table ...
WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
SectionOffsetMap);
- FileOff = OS.tell();
-
// ... and then the remaining sections ...
for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
WriteDataSectionData(Asm, Layout, *Sections[i]);
@@ -1256,577 +1551,5 @@ ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
raw_ostream &OS,
bool IsLittleEndian) {
- switch (MOTW->getEMachine()) {
- case ELF::EM_386:
- case ELF::EM_X86_64:
- return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
- case ELF::EM_ARM:
- return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
- case ELF::EM_MBLAZE:
- return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
- case ELF::EM_PPC:
- case ELF::EM_PPC64:
- return new PPCELFObjectWriter(MOTW, OS, IsLittleEndian); break;
- case ELF::EM_MIPS:
- return new MipsELFObjectWriter(MOTW, OS, IsLittleEndian); break;
- default: llvm_unreachable("Unsupported architecture"); break;
- }
-}
-
-
-/// START OF SUBCLASSES for ELFObjectWriter
-//===- ARMELFObjectWriter -------------------------------------------===//
-
-ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian)
- : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
-{}
-
-ARMELFObjectWriter::~ARMELFObjectWriter()
-{}
-
-// FIXME: get the real EABI Version from the Triple.
-void ARMELFObjectWriter::WriteEFlags() {
- Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
-}
-
-// In ARM, _MergedGlobals and other most symbols get emitted directly.
-// I.e. not as an offset to a section symbol.
-// This code is an approximation of what ARM/gcc does.
-
-STATISTIC(PCRelCount, "Total number of PIC Relocations");
-STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
-
-const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- const MCSymbol &Symbol = Target.getSymA()->getSymbol();
- bool EmitThisSym = false;
-
- const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(Symbol.getSection());
- bool InNormalSection = true;
- unsigned RelocType = 0;
- RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
-
- DEBUG(
- const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
- MCSymbolRefExpr::VariantKind Kind2;
- Kind2 = Target.getSymB() ? Target.getSymB()->getKind() :
- MCSymbolRefExpr::VK_None;
- dbgs() << "considering symbol "
- << Section.getSectionName() << "/"
- << Symbol.getName() << "/"
- << " Rel:" << (unsigned)RelocType
- << " Kind: " << (int)Kind << "/" << (int)Kind2
- << " Tmp:"
- << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
- << Symbol.isVariable() << "/" << Symbol.isTemporary()
- << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
-
- if (IsPCRel) { ++PCRelCount;
- switch (RelocType) {
- default:
- // Most relocation types are emitted as explicit symbols
- InNormalSection =
- StringSwitch<bool>(Section.getSectionName())
- .Case(".data.rel.ro.local", false)
- .Case(".data.rel", false)
- .Case(".bss", false)
- .Default(true);
- EmitThisSym = true;
- break;
- case ELF::R_ARM_ABS32:
- // But things get strange with R_ARM_ABS32
- // In this case, most things that go in .rodata show up
- // as section relative relocations
- InNormalSection =
- StringSwitch<bool>(Section.getSectionName())
- .Case(".data.rel.ro.local", false)
- .Case(".data.rel", false)
- .Case(".rodata", false)
- .Case(".bss", false)
- .Default(true);
- EmitThisSym = false;
- break;
- }
- } else {
- NonPCRelCount++;
- InNormalSection =
- StringSwitch<bool>(Section.getSectionName())
- .Case(".data.rel.ro.local", false)
- .Case(".rodata", false)
- .Case(".data.rel", false)
- .Case(".bss", false)
- .Default(true);
-
- switch (RelocType) {
- default: EmitThisSym = true; break;
- case ELF::R_ARM_ABS32: EmitThisSym = false; break;
- }
- }
-
- if (EmitThisSym)
- return &Symbol;
- if (! Symbol.isTemporary() && InNormalSection) {
- return &Symbol;
- }
- return NULL;
-}
-
-// Need to examine the Fixup when determining whether to
-// emit the relocation as an explicit symbol or as a section relative
-// offset
-unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) {
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-
- unsigned Type = GetRelocTypeInner(Target, Fixup, IsPCRel);
-
- if (RelocNeedsGOT(Modifier))
- NeedsGOT = true;
-
- return Type;
-}
-
-unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-
- unsigned Type = 0;
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default: assert(0 && "Unimplemented");
- case FK_Data_4:
- switch (Modifier) {
- default: llvm_unreachable("Unsupported Modifier");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_ARM_REL32;
- break;
- case MCSymbolRefExpr::VK_ARM_TLSGD:
- assert(0 && "unimplemented");
- break;
- case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
- Type = ELF::R_ARM_TLS_IE32;
- break;
- }
- break;
- case ARM::fixup_arm_uncondbranch:
- switch (Modifier) {
- case MCSymbolRefExpr::VK_ARM_PLT:
- Type = ELF::R_ARM_PLT32;
- break;
- default:
- Type = ELF::R_ARM_CALL;
- break;
- }
- break;
- case ARM::fixup_arm_condbranch:
- Type = ELF::R_ARM_JUMP24;
- break;
- case ARM::fixup_arm_movt_hi16:
- case ARM::fixup_arm_movt_hi16_pcrel:
- Type = ELF::R_ARM_MOVT_PREL;
- break;
- case ARM::fixup_arm_movw_lo16:
- case ARM::fixup_arm_movw_lo16_pcrel:
- Type = ELF::R_ARM_MOVW_PREL_NC;
- break;
- case ARM::fixup_t2_movt_hi16:
- case ARM::fixup_t2_movt_hi16_pcrel:
- Type = ELF::R_ARM_THM_MOVT_PREL;
- break;
- case ARM::fixup_t2_movw_lo16:
- case ARM::fixup_t2_movw_lo16_pcrel:
- Type = ELF::R_ARM_THM_MOVW_PREL_NC;
- break;
- case ARM::fixup_arm_thumb_bl:
- case ARM::fixup_arm_thumb_blx:
- switch (Modifier) {
- case MCSymbolRefExpr::VK_ARM_PLT:
- Type = ELF::R_ARM_THM_CALL;
- break;
- default:
- Type = ELF::R_ARM_NONE;
- break;
- }
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case FK_Data_4:
- switch (Modifier) {
- default: llvm_unreachable("Unsupported Modifier"); break;
- case MCSymbolRefExpr::VK_ARM_GOT:
- Type = ELF::R_ARM_GOT_BREL;
- break;
- case MCSymbolRefExpr::VK_ARM_TLSGD:
- Type = ELF::R_ARM_TLS_GD32;
- break;
- case MCSymbolRefExpr::VK_ARM_TPOFF:
- Type = ELF::R_ARM_TLS_LE32;
- break;
- case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
- Type = ELF::R_ARM_TLS_IE32;
- break;
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_ARM_ABS32;
- break;
- case MCSymbolRefExpr::VK_ARM_GOTOFF:
- Type = ELF::R_ARM_GOTOFF32;
- break;
- }
- break;
- case ARM::fixup_arm_ldst_pcrel_12:
- case ARM::fixup_arm_pcrel_10:
- case ARM::fixup_arm_adr_pcrel_12:
- case ARM::fixup_arm_thumb_bl:
- case ARM::fixup_arm_thumb_cb:
- case ARM::fixup_arm_thumb_cp:
- case ARM::fixup_arm_thumb_br:
- assert(0 && "Unimplemented");
- break;
- case ARM::fixup_arm_uncondbranch:
- Type = ELF::R_ARM_CALL;
- break;
- case ARM::fixup_arm_condbranch:
- Type = ELF::R_ARM_JUMP24;
- break;
- case ARM::fixup_arm_movt_hi16:
- Type = ELF::R_ARM_MOVT_ABS;
- break;
- case ARM::fixup_arm_movw_lo16:
- Type = ELF::R_ARM_MOVW_ABS_NC;
- break;
- case ARM::fixup_t2_movt_hi16:
- Type = ELF::R_ARM_THM_MOVT_ABS;
- break;
- case ARM::fixup_t2_movw_lo16:
- Type = ELF::R_ARM_THM_MOVW_ABS_NC;
- break;
- }
- }
-
- return Type;
-}
-
-//===- PPCELFObjectWriter -------------------------------------------===//
-
-PPCELFObjectWriter::PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian)
- : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
-}
-
-PPCELFObjectWriter::~PPCELFObjectWriter() {
-}
-
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) {
- // determine the type of the relocation
- unsigned Type;
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default:
- llvm_unreachable("Unimplemented");
- case PPC::fixup_ppc_br24:
- Type = ELF::R_PPC_REL24;
- break;
- case FK_PCRel_4:
- Type = ELF::R_PPC_REL32;
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case PPC::fixup_ppc_br24:
- Type = ELF::R_PPC_ADDR24;
- break;
- case PPC::fixup_ppc_brcond14:
- Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
- break;
- case PPC::fixup_ppc_ha16:
- Type = ELF::R_PPC_ADDR16_HA;
- break;
- case PPC::fixup_ppc_lo16:
- Type = ELF::R_PPC_ADDR16_LO;
- break;
- case PPC::fixup_ppc_lo14:
- Type = ELF::R_PPC_ADDR14;
- break;
- case FK_Data_4:
- Type = ELF::R_PPC_ADDR32;
- break;
- case FK_Data_2:
- Type = ELF::R_PPC_ADDR16;
- break;
- }
- }
- return Type;
-}
-
-void
-PPCELFObjectWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
- switch ((unsigned)Fixup.getKind()) {
- case PPC::fixup_ppc_ha16:
- case PPC::fixup_ppc_lo16:
- RelocOffset += 2;
- break;
- default:
- break;
- }
-}
-
-//===- MBlazeELFObjectWriter -------------------------------------------===//
-
-MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian)
- : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
-}
-
-MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
-}
-
-unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) {
- // determine the type of the relocation
- unsigned Type;
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default:
- llvm_unreachable("Unimplemented");
- case FK_PCRel_4:
- Type = ELF::R_MICROBLAZE_64_PCREL;
- break;
- case FK_PCRel_2:
- Type = ELF::R_MICROBLAZE_32_PCREL;
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case FK_Data_4:
- Type = ((IsRelocWithSymbol || Addend !=0)
- ? ELF::R_MICROBLAZE_32
- : ELF::R_MICROBLAZE_64);
- break;
- case FK_Data_2:
- Type = ELF::R_MICROBLAZE_32;
- break;
- }
- }
- return Type;
-}
-
-//===- X86ELFObjectWriter -------------------------------------------===//
-
-
-X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian)
- : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
-{}
-
-X86ELFObjectWriter::~X86ELFObjectWriter()
-{}
-
-unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) {
- // determine the type of the relocation
-
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
- unsigned Type;
- if (is64Bit()) {
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
-
- case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
- case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
- case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
-
- case FK_PCRel_8:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC64;
- break;
- case X86::reloc_signed_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- case X86::reloc_riprel_4byte:
- case FK_PCRel_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_PC32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_X86_64_PLT32;
- break;
- case MCSymbolRefExpr::VK_GOTPCREL:
- Type = ELF::R_X86_64_GOTPCREL;
- break;
- case MCSymbolRefExpr::VK_GOTTPOFF:
- Type = ELF::R_X86_64_GOTTPOFF;
- break;
- case MCSymbolRefExpr::VK_TLSGD:
- Type = ELF::R_X86_64_TLSGD;
- break;
- case MCSymbolRefExpr::VK_TLSLD:
- Type = ELF::R_X86_64_TLSLD;
- break;
- }
- break;
- case FK_PCRel_2:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC16;
- break;
- case FK_PCRel_1:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC8;
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case FK_Data_8: Type = ELF::R_X86_64_64; break;
- case X86::reloc_signed_4byte:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_32S;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_X86_64_GOT32;
- break;
- case MCSymbolRefExpr::VK_GOTPCREL:
- Type = ELF::R_X86_64_GOTPCREL;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_X86_64_TPOFF32;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_X86_64_DTPOFF32;
- break;
- }
- break;
- case FK_Data_4:
- Type = ELF::R_X86_64_32;
- break;
- case FK_Data_2: Type = ELF::R_X86_64_16; break;
- case FK_PCRel_1:
- case FK_Data_1: Type = ELF::R_X86_64_8; break;
- }
- }
- } else {
- if (IsPCRel) {
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_386_PLT32;
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
-
- case X86::reloc_global_offset_table:
- Type = ELF::R_386_GOTPC;
- break;
-
- // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
- // instead?
- case X86::reloc_signed_4byte:
- case FK_PCRel_4:
- case FK_Data_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_32;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_386_GOT32;
- break;
- case MCSymbolRefExpr::VK_GOTOFF:
- Type = ELF::R_386_GOTOFF;
- break;
- case MCSymbolRefExpr::VK_TLSGD:
- Type = ELF::R_386_TLS_GD;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_386_TLS_LE_32;
- break;
- case MCSymbolRefExpr::VK_INDNTPOFF:
- Type = ELF::R_386_TLS_IE;
- break;
- case MCSymbolRefExpr::VK_NTPOFF:
- Type = ELF::R_386_TLS_LE;
- break;
- case MCSymbolRefExpr::VK_GOTNTPOFF:
- Type = ELF::R_386_TLS_GOTIE;
- break;
- case MCSymbolRefExpr::VK_TLSLDM:
- Type = ELF::R_386_TLS_LDM;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_386_TLS_LDO_32;
- break;
- case MCSymbolRefExpr::VK_GOTTPOFF:
- Type = ELF::R_386_TLS_IE_32;
- break;
- }
- break;
- case FK_Data_2: Type = ELF::R_386_16; break;
- case FK_PCRel_1:
- case FK_Data_1: Type = ELF::R_386_8; break;
- }
- }
- }
-
- if (RelocNeedsGOT(Modifier))
- NeedsGOT = true;
-
- return Type;
-}
-
-MipsELFObjectWriter::MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian)
- : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {}
-
-MipsELFObjectWriter::~MipsELFObjectWriter() {}
-
-unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) {
- // tbd
- return 1;
+ return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h
deleted file mode 100644
index 862b085c76bf..000000000000
--- a/lib/MC/ELFObjectWriter.h
+++ /dev/null
@@ -1,446 +0,0 @@
-//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF object file writer information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_ELFOBJECTWRITER_H
-#define LLVM_MC_ELFOBJECTWRITER_H
-
-#include "MCELF.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-
-#include <vector>
-
-namespace llvm {
-
-class MCSection;
-class MCDataFragment;
-class MCSectionELF;
-
-class ELFObjectWriter : public MCObjectWriter {
- protected:
-
- static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
- static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
- static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
- static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
- bool Used, bool Renamed);
- static bool isLocal(const MCSymbolData &Data, bool isSignature,
- bool isUsedInReloc);
- static bool IsELFMetaDataSection(const MCSectionData &SD);
- static uint64_t DataSectionSize(const MCSectionData &SD);
- static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
- const MCSectionData &SD);
- static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
- const MCSectionData &SD);
-
- void WriteDataSectionData(MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCSectionELF &Section);
-
- /*static bool isFixupKindX86RIPRel(unsigned Kind) {
- return Kind == X86::reloc_riprel_4byte ||
- Kind == X86::reloc_riprel_4byte_movq_load;
- }*/
-
- /// ELFSymbolData - Helper struct for containing some precomputed
- /// information on symbols.
- struct ELFSymbolData {
- MCSymbolData *SymbolData;
- uint64_t StringIndex;
- uint32_t SectionIndex;
-
- // Support lexicographic sorting.
- bool operator<(const ELFSymbolData &RHS) const {
- if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
- return true;
- if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
- return false;
- return SymbolData->getSymbol().getName() <
- RHS.SymbolData->getSymbol().getName();
- }
- };
-
- /// @name Relocation Data
- /// @{
-
- struct ELFRelocationEntry {
- // Make these big enough for both 32-bit and 64-bit
- uint64_t r_offset;
- int Index;
- unsigned Type;
- const MCSymbol *Symbol;
- uint64_t r_addend;
-
- ELFRelocationEntry()
- : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
-
- ELFRelocationEntry(uint64_t RelocOffset, int Idx,
- unsigned RelType, const MCSymbol *Sym,
- uint64_t Addend)
- : r_offset(RelocOffset), Index(Idx), Type(RelType),
- Symbol(Sym), r_addend(Addend) {}
-
- // Support lexicographic sorting.
- bool operator<(const ELFRelocationEntry &RE) const {
- return RE.r_offset < r_offset;
- }
- };
-
- /// The target specific ELF writer instance.
- llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
-
- SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
- SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
- DenseMap<const MCSymbol *, const MCSymbol *> Renames;
-
- llvm::DenseMap<const MCSectionData*,
- std::vector<ELFRelocationEntry> > Relocations;
- DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
-
- /// @}
- /// @name Symbol Table Data
- /// @{
-
- SmallString<256> StringTable;
- std::vector<ELFSymbolData> LocalSymbolData;
- std::vector<ELFSymbolData> ExternalSymbolData;
- std::vector<ELFSymbolData> UndefinedSymbolData;
-
- /// @}
-
- bool NeedsGOT;
-
- bool NeedsSymtabShndx;
-
- // This holds the symbol table index of the last local symbol.
- unsigned LastLocalSymbolIndex;
- // This holds the .strtab section index.
- unsigned StringTableIndex;
- // This holds the .symtab section index.
- unsigned SymbolTableIndex;
-
- unsigned ShstrtabIndex;
-
-
- virtual const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const;
-
- // For arch-specific emission of explicit reloc symbol
- virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- return NULL;
- }
-
- bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
- bool hasRelocationAddend() const {
- return TargetObjectWriter->hasRelocationAddend();
- }
-
- public:
- ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS, bool IsLittleEndian)
- : MCObjectWriter(_OS, IsLittleEndian),
- TargetObjectWriter(MOTW),
- NeedsGOT(false), NeedsSymtabShndx(false){
- }
-
- virtual ~ELFObjectWriter();
-
- void WriteWord(uint64_t W) {
- if (is64Bit())
- Write64(W);
- else
- Write32(W);
- }
-
- void StringLE16(char *buf, uint16_t Value) {
- buf[0] = char(Value >> 0);
- buf[1] = char(Value >> 8);
- }
-
- void StringLE32(char *buf, uint32_t Value) {
- StringLE16(buf, uint16_t(Value >> 0));
- StringLE16(buf + 2, uint16_t(Value >> 16));
- }
-
- void StringLE64(char *buf, uint64_t Value) {
- StringLE32(buf, uint32_t(Value >> 0));
- StringLE32(buf + 4, uint32_t(Value >> 32));
- }
-
- void StringBE16(char *buf ,uint16_t Value) {
- buf[0] = char(Value >> 8);
- buf[1] = char(Value >> 0);
- }
-
- void StringBE32(char *buf, uint32_t Value) {
- StringBE16(buf, uint16_t(Value >> 16));
- StringBE16(buf + 2, uint16_t(Value >> 0));
- }
-
- void StringBE64(char *buf, uint64_t Value) {
- StringBE32(buf, uint32_t(Value >> 32));
- StringBE32(buf + 4, uint32_t(Value >> 0));
- }
-
- void String8(MCDataFragment &F, uint8_t Value) {
- char buf[1];
- buf[0] = Value;
- F.getContents() += StringRef(buf, 1);
- }
-
- void String16(MCDataFragment &F, uint16_t Value) {
- char buf[2];
- if (isLittleEndian())
- StringLE16(buf, Value);
- else
- StringBE16(buf, Value);
- F.getContents() += StringRef(buf, 2);
- }
-
- void String32(MCDataFragment &F, uint32_t Value) {
- char buf[4];
- if (isLittleEndian())
- StringLE32(buf, Value);
- else
- StringBE32(buf, Value);
- F.getContents() += StringRef(buf, 4);
- }
-
- void String64(MCDataFragment &F, uint64_t Value) {
- char buf[8];
- if (isLittleEndian())
- StringLE64(buf, Value);
- else
- StringBE64(buf, Value);
- F.getContents() += StringRef(buf, 8);
- }
-
- virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
-
- /// Default e_flags = 0
- virtual void WriteEFlags() { Write32(0); }
-
- virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
- uint64_t name, uint8_t info,
- uint64_t value, uint64_t size,
- uint8_t other, uint32_t shndx,
- bool Reserved);
-
- virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
- ELFSymbolData &MSD,
- const MCAsmLayout &Layout);
-
- typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
- virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const SectionIndexMapTy &SectionIndexMap);
-
- virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue);
-
- virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
- const MCSymbol *S);
-
- // Map from a group section to the signature symbol
- typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
- // Map from a signature symbol to the group section
- typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
- // Map from a section to the section with the relocations
- typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
- // Map from a section to its offset
- typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
-
- /// ComputeSymbolTable - Compute the symbol table data
- ///
- /// \param StringTable [out] - The string table data.
- /// \param StringIndexMap [out] - Map from symbol names to offsets in the
- /// string table.
- virtual void ComputeSymbolTable(MCAssembler &Asm,
- const SectionIndexMapTy &SectionIndexMap,
- RevGroupMapTy RevGroupMap,
- unsigned NumRegularSections);
-
- virtual void ComputeIndexMap(MCAssembler &Asm,
- SectionIndexMapTy &SectionIndexMap,
- const RelMapTy &RelMap);
-
- void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
- RelMapTy &RelMap);
-
- void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
- const RelMapTy &RelMap);
-
- virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
- SectionIndexMapTy &SectionIndexMap,
- const RelMapTy &RelMap);
-
- // Create the sections that show up in the symbol table. Currently
- // those are the .note.GNU-stack section and the group sections.
- virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
- GroupMapTy &GroupMap,
- RevGroupMapTy &RevGroupMap,
- SectionIndexMapTy &SectionIndexMap,
- const RelMapTy &RelMap);
-
- virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
- const MCAsmLayout &Layout);
-
- void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
- const MCAsmLayout &Layout,
- const SectionIndexMapTy &SectionIndexMap,
- const SectionOffsetMapTy &SectionOffsetMap);
-
- void ComputeSectionOrder(MCAssembler &Asm,
- std::vector<const MCSectionELF*> &Sections);
-
- virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
- uint64_t Address, uint64_t Offset,
- uint64_t Size, uint32_t Link, uint32_t Info,
- uint64_t Alignment, uint64_t EntrySize);
-
- virtual void WriteRelocationsFragment(const MCAssembler &Asm,
- MCDataFragment *F,
- const MCSectionData *SD);
-
- virtual bool
- IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
- const MCSymbolData &DataA,
- const MCFragment &FB,
- bool InSet,
- bool IsPCRel) const;
-
- virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
- virtual void WriteSection(MCAssembler &Asm,
- const SectionIndexMapTy &SectionIndexMap,
- uint32_t GroupSymbolIndex,
- uint64_t Offset, uint64_t Size, uint64_t Alignment,
- const MCSectionELF &Section);
-
- protected:
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend) = 0;
- virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { }
- };
-
- //===- X86ELFObjectWriter -------------------------------------------===//
-
- class X86ELFObjectWriter : public ELFObjectWriter {
- public:
- X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian);
-
- virtual ~X86ELFObjectWriter();
- protected:
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend);
- };
-
-
- //===- ARMELFObjectWriter -------------------------------------------===//
-
- class ARMELFObjectWriter : public ELFObjectWriter {
- public:
- // FIXME: MCAssembler can't yet return the Subtarget,
- enum { DefaultEABIVersion = 0x05000000U };
-
- ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian);
-
- virtual ~ARMELFObjectWriter();
-
- virtual void WriteEFlags();
- protected:
- virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const;
-
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend);
- private:
- unsigned GetRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const;
-
- };
-
- //===- PPCELFObjectWriter -------------------------------------------===//
-
- class PPCELFObjectWriter : public ELFObjectWriter {
- public:
- PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian);
-
- virtual ~PPCELFObjectWriter();
- protected:
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend);
- virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
- };
-
- //===- MBlazeELFObjectWriter -------------------------------------------===//
-
- class MBlazeELFObjectWriter : public ELFObjectWriter {
- public:
- MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian);
-
- virtual ~MBlazeELFObjectWriter();
- protected:
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend);
- };
-
- //===- MipsELFObjectWriter -------------------------------------------===//
-
- class MipsELFObjectWriter : public ELFObjectWriter {
- public:
- MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW,
- raw_ostream &_OS,
- bool IsLittleEndian);
-
- virtual ~MipsELFObjectWriter();
- protected:
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend);
- };
-}
-
-#endif
diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt
new file mode 100644
index 000000000000..f35dbe4d5d35
--- /dev/null
+++ b/lib/MC/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/MC/LLVMBuild.txt -----------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCDisassembler MCParser
+
+[component_0]
+type = Library
+name = MC
+parent = Libraries
+required_libraries = Object Support
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 2c150f456cf6..0b2e4ae7ed07 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
using namespace llvm;
MCAsmBackend::MCAsmBackend()
@@ -21,14 +22,22 @@ MCAsmBackend::~MCAsmBackend() {
const MCFixupKindInfo &
MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static const MCFixupKindInfo Builtins[] = {
- { "FK_Data_1", 0, 8, 0 },
- { "FK_Data_2", 0, 16, 0 },
- { "FK_Data_4", 0, 32, 0 },
- { "FK_Data_8", 0, 64, 0 },
- { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_Data_1", 0, 8, 0 },
+ { "FK_Data_2", 0, 16, 0 },
+ { "FK_Data_4", 0, 32, 0 },
+ { "FK_Data_8", 0, 64, 0 },
+ { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+ { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_GPRel_1", 0, 8, 0 },
+ { "FK_GPRel_2", 0, 16, 0 },
+ { "FK_GPRel_4", 0, 32, 0 },
+ { "FK_GPRel_8", 0, 64, 0 },
+ { "FK_SecRel_1", 0, 8, 0 },
+ { "FK_SecRel_2", 0, 16, 0 },
+ { "FK_SecRel_4", 0, 32, 0 },
+ { "FK_SecRel_8", 0, 64, 0 }
};
assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 95861bc61c27..8286c1dfeae1 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -29,7 +29,6 @@ MCAsmInfo::MCAsmInfo() {
HasSubsectionsViaSymbols = false;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
- StructorOutputOrder = Structors::ReversePriorityOrder;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
@@ -50,6 +49,7 @@ MCAsmInfo::MCAsmInfo() {
AllowQuotesInName = false;
AllowNameToStartWithDigit = false;
AllowPeriodsInName = true;
+ AllowUTF8 = true;
ZeroDirective = "\t.zero\t";
AsciiDirective = "\t.ascii\t";
AscizDirective = "\t.asciz\t";
@@ -68,6 +68,7 @@ MCAsmInfo::MCAsmInfo() {
AlignDirective = "\t.align\t";
AlignmentIsInBytes = true;
TextAlignFillValue = 0;
+ GPRel64Directive = 0;
GPRel32Directive = 0;
GlobalDirective = "\t.globl\t";
HasSetDirective = true;
@@ -91,6 +92,7 @@ MCAsmInfo::MCAsmInfo() {
DwarfRequiresRelocationForSectionOffset = true;
DwarfSectionOffsetDirective = 0;
DwarfUsesLabelOffsetForRanges = true;
+ DwarfUsesRelocationsForStringPool = true;
DwarfRegNumForCFI = false;
HasMicrosoftFastStdCallMangling = false;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 434d9103a71a..881d99217bda 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -13,9 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoCOFF.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
+void MCAsmInfoCOFF::anchor() { }
+
MCAsmInfoCOFF::MCAsmInfoCOFF() {
GlobalPrefix = "_";
COMMDirectiveAlignmentIsInBytes = false;
@@ -38,3 +39,15 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
SupportsDataRegions = false;
}
+
+void MCAsmInfoMicrosoft::anchor() { }
+
+MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
+ AllowQuotesInName = true;
+}
+
+void MCAsmInfoGNUCOFF::anchor() { }
+
+MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
+
+}
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index b20e338f7904..c1e26350dc8e 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -18,6 +18,8 @@
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
+void MCAsmInfoDarwin::anchor() { }
+
MCAsmInfoDarwin::MCAsmInfoDarwin() {
// Common settings for all Darwin targets.
// Syntax:
@@ -39,7 +41,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
HasMachoZeroFillDirective = true; // Uses .zerofill
HasMachoTBSSDirective = true; // Uses .tbss
- StructorOutputOrder = Structors::PriorityOrder;
HasStaticCtorDtorReferenceInStaticMode = true;
CodeBegin = "L$start$code$";
@@ -57,8 +58,9 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HiddenVisibilityAttr = MCSA_PrivateExtern;
HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+
// Doesn't support protected visibility.
- ProtectedVisibilityAttr = MCSA_Global;
+ ProtectedVisibilityAttr = MCSA_Invalid;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
@@ -66,4 +68,5 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
DwarfRequiresRelocationForSectionOffset = false;
DwarfUsesLabelOffsetForRanges = false;
+ DwarfUsesRelocationsForStringPool = false;
}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 3fcbb05907bc..11f0f7296337 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/PathV2.h"
#include <cctype>
using namespace llvm;
@@ -50,6 +51,7 @@ private:
unsigned ShowInst : 1;
unsigned UseLoc : 1;
unsigned UseCFI : 1;
+ unsigned UseDwarfDirectory : 1;
enum EHSymbolFlags { EHGlobal = 1,
EHWeakDefinition = 1 << 1,
@@ -59,17 +61,21 @@ private:
bool needsSet(const MCExpr *Value);
void EmitRegisterName(int64_t Register);
+ virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
+ virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame);
public:
MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
MCInstPrinter *printer, MCCodeEmitter *emitter,
MCAsmBackend *asmbackend,
bool showInst)
: MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
- ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) {
+ ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI),
+ UseDwarfDirectory(useDwarfDirectory) {
if (InstPrinter && IsVerboseAsm)
InstPrinter->setCommentStream(CommentStream);
}
@@ -150,6 +156,7 @@ public:
virtual void EmitCOFFSymbolStorageClass(int StorageClass);
virtual void EmitCOFFSymbolType(int Type);
virtual void EndCOFFSymbolDef();
+ virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
@@ -179,6 +186,8 @@ public:
virtual void EmitSLEB128Value(const MCExpr *Value);
+ virtual void EmitGPRel64Value(const MCExpr *Value);
+
virtual void EmitGPRel32Value(const MCExpr *Value);
@@ -192,19 +201,18 @@ public:
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0);
- virtual void EmitValueToOffset(const MCExpr *Offset,
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename);
- virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename);
virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
unsigned Column, unsigned Flags,
unsigned Isa, unsigned Discriminator,
StringRef FileName);
virtual void EmitCFISections(bool EH, bool Debug);
- virtual void EmitCFIStartProc();
- virtual void EmitCFIEndProc();
virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
virtual void EmitCFIDefCfaOffset(int64_t Offset);
virtual void EmitCFIDefCfaRegister(int64_t Register);
@@ -216,6 +224,7 @@ public:
virtual void EmitCFISameValue(int64_t Register);
virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+ virtual void EmitCFISignalFrame();
virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
virtual void EmitWin64EHEndProc();
@@ -249,7 +258,7 @@ public:
/// indicated by the hasRawTextSupport() predicate.
virtual void EmitRawText(StringRef String);
- virtual void Finish();
+ virtual void FinishImpl();
/// @}
};
@@ -334,7 +343,6 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
- default: assert(0 && "Invalid flag!");
case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break;
case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
case MCAF_Code16: OS << '\t'<< MAI.getCode16Directive(); break;
@@ -386,7 +394,7 @@ void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
switch (Attribute) {
- case MCSA_Invalid: assert(0 && "Invalid symbol attribute");
+ case MCSA_Invalid: llvm_unreachable("Invalid symbol attribute");
case MCSA_ELF_TypeFunction: /// .type _foo, STT_FUNC # aka @function
case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
case MCSA_ELF_TypeObject: /// .type _foo, STT_OBJECT # aka @object
@@ -398,7 +406,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
OS << "\t.type\t" << *Symbol << ','
<< ((MAI.getCommentString()[0] != '@') ? '@' : '%');
switch (Attribute) {
- default: assert(0 && "Unknown ELF .type");
+ default: llvm_unreachable("Unknown ELF .type");
case MCSA_ELF_TypeFunction: OS << "function"; break;
case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
case MCSA_ELF_TypeObject: OS << "object"; break;
@@ -465,6 +473,11 @@ void MCAsmStreamer::EndCOFFSymbolDef() {
EmitEOL();
}
+void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+ OS << "\t.secrel32\t" << *Symbol << '\n';
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(MAI.hasDotTypeDotSizeDirective());
OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
@@ -652,6 +665,12 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
EmitEOL();
}
+void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) {
+ assert(MAI.getGPRel64Directive() != 0);
+ OS << MAI.getGPRel64Directive() << *Value;
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
assert(MAI.getGPRel32Directive() != 0);
OS << MAI.getGPRel32Directive() << *Value;
@@ -733,11 +752,12 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
1, MaxBytesToEmit);
}
-void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
// FIXME: Verify that Offset is associated with the current section.
OS << ".org " << *Offset << ", " << (unsigned) Value;
EmitEOL();
+ return false;
}
@@ -748,13 +768,27 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
EmitEOL();
}
-bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename) {
+ if (!UseDwarfDirectory && !Directory.empty()) {
+ if (sys::path::is_absolute(Filename))
+ return EmitDwarfFileDirective(FileNo, "", Filename);
+
+ SmallString<128> FullPathName = Directory;
+ sys::path::append(FullPathName, Filename);
+ return EmitDwarfFileDirective(FileNo, "", FullPathName);
+ }
+
if (UseLoc) {
OS << "\t.file\t" << FileNo << ' ';
+ if (!Directory.empty()) {
+ PrintQuotedString(Directory, OS);
+ OS << ' ';
+ }
PrintQuotedString(Filename, OS);
EmitEOL();
}
- return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
}
void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -816,21 +850,25 @@ void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
EmitEOL();
}
-void MCAsmStreamer::EmitCFIStartProc() {
- MCStreamer::EmitCFIStartProc();
-
- if (!UseCFI)
+void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+ if (!UseCFI) {
+ RecordProcStart(Frame);
return;
+ }
OS << "\t.cfi_startproc";
EmitEOL();
}
-void MCAsmStreamer::EmitCFIEndProc() {
- MCStreamer::EmitCFIEndProc();
-
- if (!UseCFI)
+void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ if (!UseCFI) {
+ RecordProcEnd(Frame);
return;
+ }
+
+ // Put a dummy non-null value in Frame.End to mark that this frame has been
+ // closed.
+ Frame.End = (MCSymbol *) 1;
OS << "\t.cfi_endproc";
EmitEOL();
@@ -965,6 +1003,16 @@ void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
EmitEOL();
}
+void MCAsmStreamer::EmitCFISignalFrame() {
+ MCStreamer::EmitCFISignalFrame();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cif_signal_frame";
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
MCStreamer::EmitWin64EHStartProc(Symbol);
@@ -1260,10 +1308,16 @@ void MCAsmStreamer::EmitRawText(StringRef String) {
EmitEOL();
}
-void MCAsmStreamer::Finish() {
+void MCAsmStreamer::FinishImpl() {
+ // FIXME: This header is duplicated with MCObjectStreamer
// Dump out the dwarf file & directory tables and line tables.
+ const MCSymbol *LineSectionSymbol = NULL;
if (getContext().hasDwarfFiles() && !UseLoc)
- MCDwarfFileTable::Emit(this);
+ LineSectionSymbol = MCDwarfFileTable::Emit(this);
+
+ // If we are generating dwarf for assembly source files dump out the sections.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfInfo::Emit(this, LineSectionSymbol);
if (!UseCFI)
EmitFrames(false);
@@ -1271,9 +1325,9 @@ void MCAsmStreamer::Finish() {
MCStreamer *llvm::createAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc,
- bool useCFI, MCInstPrinter *IP,
- MCCodeEmitter *CE, MCAsmBackend *MAB,
- bool ShowInst) {
+ bool useCFI, bool useDwarfDirectory,
+ MCInstPrinter *IP, MCCodeEmitter *CE,
+ MCAsmBackend *MAB, bool ShowInst) {
return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
- IP, CE, MAB, ShowInst);
+ useDwarfDirectory, IP, CE, MAB, ShowInst);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 06c8aec91917..66ba9b81f3aa 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -13,13 +13,13 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -33,7 +33,7 @@ using namespace llvm;
namespace {
namespace stats {
STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
-STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(evaluateFixup, "Number of evaluated fixups");
STATISTIC(FragmentLayouts, "Number of fragment layouts");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
@@ -118,7 +118,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
report_fatal_error("unable to evaluate offset to undefined symbol '" +
Target.getSymB()->getSymbol().getName() + "'");
-
+
uint64_t Offset = Target.getConstant();
if (Target.getSymA())
Offset += getSymbolOffset(&Assembler.getSymbolData(
@@ -136,7 +136,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
// The size is the last fragment's end offset.
const MCFragment &F = SD->getFragmentList().back();
- return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+ return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
}
uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
@@ -237,13 +237,13 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
return SD->getFragment()->getAtom();
}
-bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value) const {
- ++stats::EvaluateFixup;
+ ++stats::evaluateFixup;
if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
- report_fatal_error("expected relocatable expression");
+ getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
bool IsPCRel = Backend.getFixupKindInfo(
Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
@@ -273,13 +273,10 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
Value = Target.getConstant();
- bool IsThumb = false;
if (const MCSymbolRefExpr *A = Target.getSymA()) {
const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
if (Sym.isDefined())
Value += Layout.getSymbolOffset(&getSymbolData(Sym));
- if (isThumbFunc(&Sym))
- IsThumb = true;
}
if (const MCSymbolRefExpr *B = Target.getSymB()) {
const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
@@ -295,24 +292,22 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
if (IsPCRel) {
uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
-
+
// A number of ARM fixups in Thumb mode require that the effective PC
// address be determined as the 32-bit aligned version of the actual offset.
if (ShouldAlignPC) Offset &= ~0x3;
Value -= Offset;
}
- // ARM fixups based from a thumb function address need to have the low
- // bit set. The actual value is always at least 16-bit aligned, so the
- // low bit is normally clear and available for use as an ISA flag for
- // interworking.
- if (IsThumb)
- Value |= 1;
+ // Let the backend adjust the fixup value if necessary, including whether
+ // we need a relocation.
+ Backend.processFixupValue(*this, Layout, Fixup, DF, Target, Value,
+ IsResolved);
return IsResolved;
}
-uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
const MCFragment &F) const {
switch (F.getKind()) {
case MCFragment::FT_Data:
@@ -355,8 +350,7 @@ uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
return cast<MCDwarfCallFrameFragment>(F).getContents().size();
}
- assert(0 && "invalid fragment kind");
- return 0;
+ llvm_unreachable("invalid fragment kind");
}
void MCAsmLayout::LayoutFragment(MCFragment *F) {
@@ -374,7 +368,7 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
// Compute fragment offset and size.
uint64_t Offset = 0;
if (Prev)
- Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+ Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
F->Offset = Offset;
LastValidFragment[F->getParent()] = F;
@@ -390,7 +384,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
++stats::EmittedFragments;
// FIXME: Embed in fragments instead?
- uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+ uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
switch (F.getKind()) {
case MCFragment::FT_Align: {
MCAlignFragment &AF = cast<MCAlignFragment>(F);
@@ -412,7 +406,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
// bytes left to fill use the the Value and ValueSize to fill the rest.
// If we are aligning with nops, ask that target to emit the right data.
if (AF.hasEmitNops()) {
- if (!Asm.getBackend().WriteNopData(Count, OW))
+ if (!Asm.getBackend().writeNopData(Count, OW))
report_fatal_error("unable to write nop sequence of " +
Twine(Count) + " bytes");
break;
@@ -421,8 +415,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
// Otherwise, write out in multiples of the value size.
for (uint64_t i = 0; i != Count; ++i) {
switch (AF.getValueSize()) {
- default:
- assert(0 && "Invalid size!");
+ default: llvm_unreachable("Invalid size!");
case 1: OW->Write8 (uint8_t (AF.getValue())); break;
case 2: OW->Write16(uint16_t(AF.getValue())); break;
case 4: OW->Write32(uint32_t(AF.getValue())); break;
@@ -446,8 +439,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
switch (FF.getValueSize()) {
- default:
- assert(0 && "Invalid size!");
+ default: llvm_unreachable("Invalid size!");
case 1: OW->Write8 (uint8_t (FF.getValue())); break;
case 2: OW->Write16(uint16_t(FF.getValue())); break;
case 4: OW->Write32(uint32_t(FF.getValue())); break;
@@ -493,7 +485,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
assert(OW->getStream().tell() - Start == FragmentSize);
}
-void MCAssembler::WriteSectionData(const MCSectionData *SD,
+void MCAssembler::writeSectionData(const MCSectionData *SD,
const MCAsmLayout &Layout) const {
// Ignore virtual sections.
if (SD->getSection().isVirtualSection()) {
@@ -503,8 +495,7 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
for (MCSectionData::const_iterator it = SD->begin(),
ie = SD->end(); it != ie; ++it) {
switch (it->getKind()) {
- default:
- assert(0 && "Invalid fragment in virtual section!");
+ default: llvm_unreachable("Invalid fragment in virtual section!");
case MCFragment::FT_Data: {
// Check that we aren't trying to write a non-zero contents (or fixups)
// into a virtual section. This is to support clients which use standard
@@ -546,13 +537,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
}
-uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+uint64_t MCAssembler::handleFixup(const MCAsmLayout &Layout,
MCFragment &F,
const MCFixup &Fixup) {
// Evaluate the fixup.
MCValue Target;
uint64_t FixedValue;
- if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+ if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
// The fixup was unresolved, we need a relocation. Inform the object
// writer of the relocation, and give it an opportunity to adjust the
// fixup value if need be.
@@ -592,7 +583,7 @@ void MCAssembler::Finish() {
}
// Layout until everything fits.
- while (LayoutOnce(Layout))
+ while (layoutOnce(Layout))
continue;
DEBUG_WITH_TYPE("mc-dump", {
@@ -600,7 +591,7 @@ void MCAssembler::Finish() {
dump(); });
// Finalize the layout, including fragment lowering.
- FinishLayout(Layout);
+ finishLayout(Layout);
DEBUG_WITH_TYPE("mc-dump", {
llvm::errs() << "assembler backend - final-layout\n--\n";
@@ -621,8 +612,8 @@ void MCAssembler::Finish() {
for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
MCFixup &Fixup = *it3;
- uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
- getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+ uint64_t FixedValue = handleFixup(Layout, *DF, Fixup);
+ getBackend().applyFixup(Fixup, DF->getContents().data(),
DF->getContents().size(), FixedValue);
}
}
@@ -631,8 +622,8 @@ void MCAssembler::Finish() {
for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
MCFixup &Fixup = *it3;
- uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
- getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+ uint64_t FixedValue = handleFixup(Layout, *IF, Fixup);
+ getBackend().applyFixup(Fixup, IF->getCode().data(),
IF->getCode().size(), FixedValue);
}
}
@@ -645,8 +636,8 @@ void MCAssembler::Finish() {
stats::ObjectBytes += OS.tell() - StartOffset;
}
-bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
- const MCFragment *DF,
+bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
+ const MCInstFragment *DF,
const MCAsmLayout &Layout) const {
if (getRelaxAll())
return true;
@@ -654,34 +645,31 @@ bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
// If we cannot resolve the fixup value, it requires relaxation.
MCValue Target;
uint64_t Value;
- if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+ if (!evaluateFixup(Layout, Fixup, DF, Target, Value))
return true;
- // Otherwise, relax if the value is too big for a (signed) i8.
- //
- // FIXME: This is target dependent!
- return int64_t(Value) != int64_t(int8_t(Value));
+ return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
}
-bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+bool MCAssembler::fragmentNeedsRelaxation(const MCInstFragment *IF,
const MCAsmLayout &Layout) const {
// If this inst doesn't ever need relaxation, ignore it. This occurs when we
// are intentionally pushing out inst fragments, or because we relaxed a
// previous instruction to one that doesn't need relaxation.
- if (!getBackend().MayNeedRelaxation(IF->getInst()))
+ if (!getBackend().mayNeedRelaxation(IF->getInst()))
return false;
for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
ie = IF->fixup_end(); it != ie; ++it)
- if (FixupNeedsRelaxation(*it, IF, Layout))
+ if (fixupNeedsRelaxation(*it, IF, Layout))
return true;
return false;
}
-bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
MCInstFragment &IF) {
- if (!FragmentNeedsRelaxation(&IF, Layout))
+ if (!fragmentNeedsRelaxation(&IF, Layout))
return false;
++stats::RelaxedInstructions;
@@ -692,7 +680,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
// Relax the fragment.
MCInst Relaxed;
- getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+ getBackend().relaxInstruction(IF.getInst(), Relaxed);
// Encode the new instruction.
//
@@ -715,7 +703,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
return true;
}
-bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
int64_t Value = 0;
uint64_t OldSize = LF.getContents().size();
bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
@@ -732,8 +720,8 @@ bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
return OldSize != LF.getContents().size();
}
-bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
- MCDwarfLineAddrFragment &DF) {
+bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
+ MCDwarfLineAddrFragment &DF) {
int64_t AddrDelta = 0;
uint64_t OldSize = DF.getContents().size();
bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
@@ -749,7 +737,7 @@ bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
return OldSize != Data.size();
}
-bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
MCDwarfCallFrameFragment &DF) {
int64_t AddrDelta = 0;
uint64_t OldSize = DF.getContents().size();
@@ -764,7 +752,7 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
return OldSize != Data.size();
}
-bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout,
MCSectionData &SD) {
MCFragment *FirstInvalidFragment = NULL;
// Scan for fragments that need relaxation.
@@ -776,19 +764,19 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
default:
break;
case MCFragment::FT_Inst:
- relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+ relaxedFrag = relaxInstruction(Layout, *cast<MCInstFragment>(it2));
break;
case MCFragment::FT_Dwarf:
- relaxedFrag = RelaxDwarfLineAddr(Layout,
+ relaxedFrag = relaxDwarfLineAddr(Layout,
*cast<MCDwarfLineAddrFragment>(it2));
break;
case MCFragment::FT_DwarfFrame:
relaxedFrag =
- RelaxDwarfCallFrameFragment(Layout,
+ relaxDwarfCallFrameFragment(Layout,
*cast<MCDwarfCallFrameFragment>(it2));
break;
case MCFragment::FT_LEB:
- relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+ relaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(it2));
break;
}
// Update the layout, and remember that we relaxed.
@@ -802,20 +790,20 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
return false;
}
-bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
++stats::RelaxationSteps;
bool WasRelaxed = false;
for (iterator it = begin(), ie = end(); it != ie; ++it) {
MCSectionData &SD = *it;
- while(LayoutSectionOnce(Layout, SD))
+ while(layoutSectionOnce(Layout, SD))
WasRelaxed = true;
}
return WasRelaxed;
}
-void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+void MCAssembler::finishLayout(MCAsmLayout &Layout) {
// The layout is done. Mark every fragment as valid.
for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
@@ -975,3 +963,13 @@ void MCAssembler::dump() {
}
OS << "]>\n";
}
+
+// anchors for MC*Fragment vtables
+void MCDataFragment::anchor() { }
+void MCInstFragment::anchor() { }
+void MCAlignFragment::anchor() { }
+void MCFillFragment::anchor() { }
+void MCOrgFragment::anchor() { }
+void MCLEBFragment::anchor() { }
+void MCDwarfLineAddrFragment::anchor() { }
+void MCDwarfCallFrameFragment::anchor() { }
diff --git a/lib/MC/MCCodeGenInfo.cpp b/lib/MC/MCCodeGenInfo.cpp
index 236e7de68a8a..d9dcfd0614bc 100644
--- a/lib/MC/MCCodeGenInfo.cpp
+++ b/lib/MC/MCCodeGenInfo.cpp
@@ -15,7 +15,9 @@
#include "llvm/MC/MCCodeGenInfo.h"
using namespace llvm;
-void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM) {
+void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
RelocationModel = RM;
CMModel = CM;
+ OptLevel = OL;
}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 82690ee3b3e9..d3c4fb1d7ca5 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -20,6 +20,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Signals.h"
using namespace llvm;
typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
@@ -28,8 +31,8 @@ typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
- const MCObjectFileInfo *mofi) :
- MAI(mai), MRI(mri), MOFI(mofi),
+ const MCObjectFileInfo *mofi, const SourceMgr *mgr) :
+ SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi),
Allocator(), Symbols(Allocator), UsedNames(Allocator),
NextUniqueID(0),
CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
@@ -43,6 +46,8 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
SecureLogUsed = false;
DwarfLocSeen = false;
+ GenDwarfForAssembly = false;
+ GenDwarfFileNumber = 0;
}
MCContext::~MCContext() {
@@ -248,7 +253,8 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
/// directory tables. If the file number has already been allocated it is an
/// error and zero is returned and the client reports the error, else the
/// allocated file number is returned. The file numbers may be in any order.
-unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
+ unsigned FileNumber) {
// TODO: a FileNumber of zero says to use the next available file number.
// Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
// to not be less than one. This needs to be change to be not less than zero.
@@ -266,19 +272,23 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
// Get the new MCDwarfFile slot for this FileNumber.
MCDwarfFile *&File = MCDwarfFiles[FileNumber];
- // Separate the directory part from the basename of the FileName.
- std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+ if (Directory.empty()) {
+ // Separate the directory part from the basename of the FileName.
+ std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+ Directory = Slash.second;
+ if (!Directory.empty()) {
+ Directory = Slash.first;
+ FileName = Slash.second;
+ }
+ }
// Find or make a entry in the MCDwarfDirs vector for this Directory.
- StringRef Name;
- unsigned DirIndex;
// Capture directory name.
- if (Slash.second.empty()) {
- Name = Slash.first;
- DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+ unsigned DirIndex;
+ if (Directory.empty()) {
+ // For FileNames with no directories a DirIndex of 0 is used.
+ DirIndex = 0;
} else {
- StringRef Directory = Slash.first;
- Name = Slash.second;
DirIndex = 0;
for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) {
if (Directory == MCDwarfDirs[DirIndex])
@@ -291,16 +301,16 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
}
// The DirIndex is one based, as DirIndex of 0 is used for FileNames with
// no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
- // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
- // stored at MCDwarfFiles[FileNumber].Name .
+ // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames
+ // are stored at MCDwarfFiles[FileNumber].Name .
DirIndex++;
}
// Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
// vector.
- char *Buf = static_cast<char *>(Allocate(Name.size()));
- memcpy(Buf, Name.data(), Name.size());
- File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+ char *Buf = static_cast<char *>(Allocate(FileName.size()));
+ memcpy(Buf, FileName.data(), FileName.size());
+ File = new (*this) MCDwarfFile(StringRef(Buf, FileName.size()), DirIndex);
// return the allocated FileNumber.
return FileNumber;
@@ -314,3 +324,19 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
return MCDwarfFiles[FileNumber] != 0;
}
+
+void MCContext::FatalError(SMLoc Loc, const Twine &Msg) {
+ // If we have a source manager and a location, use it. Otherwise just
+ // use the generic report_fatal_error().
+ if (!SrcMgr || Loc == SMLoc())
+ report_fatal_error(Msg);
+
+ // Use the source manager to print the message.
+ SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+
+ // If we reached here, we are failing ungracefully. Run the interrupt handlers
+ // to make sure any special cleanups get done, in particular that we remove
+ // files registered with RemoveFileOnSignal.
+ sys::RunInterruptHandlers();
+ exit(1);
+}
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 4debb288e5ab..5e2cd8387db1 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -2,29 +2,7 @@ add_llvm_library(LLVMMCDisassembler
Disassembler.cpp
EDDisassembler.cpp
EDInst.cpp
+ EDMain.cpp
EDOperand.cpp
EDToken.cpp
)
-
-add_llvm_library_dependencies(LLVMMCDisassembler
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMTarget
- )
-
-foreach(t ${LLVM_TARGETS_TO_BUILD})
- set(td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t})
- if(EXISTS ${td}/TargetInfo/CMakeLists.txt)
- add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Info")
- endif()
- if(EXISTS ${td}/MCTargetDesc/CMakeLists.txt)
- add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Desc")
- endif()
- if(EXISTS ${td}/AsmParser/CMakeLists.txt)
- add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}AsmParser")
- endif()
- if(EXISTS ${td}/Disassembler/CMakeLists.txt)
- add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Disassembler")
- endif()
-endforeach(t)
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 16e66dc98e74..35f675dc6d1b 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -15,10 +15,13 @@
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ErrorHandling.h"
namespace llvm {
class Target;
@@ -36,6 +39,12 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
int TagType, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp) {
// Initialize targets and assembly printers/parsers.
+ // FIXME: Clients are responsible for initializing the targets. And this
+ // would be done by calling routines in "llvm-c/Target.h" which are static
+ // line functions. But the current use of LLVMCreateDisasm() is to dynamically
+ // load libLTO with dlopen() and then lookup the symbols using dlsym().
+ // And since these initialize routines are static that does not work which
+ // is why the call to them in this 'C' library API was added back.
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
@@ -50,6 +59,9 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
assert(MAI && "Unable to create target asm info!");
+ const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
+ assert(MII && "Unable to create target instruction info!");
+
const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName);
assert(MRI && "Unable to create target register info!");
@@ -73,13 +85,13 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
- *MAI, *STI);
+ *MAI, *MII, *MRI, *STI);
assert(IP && "Unable to create instruction printer!");
LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
GetOpInfo, SymbolLookUp,
TheTarget, MAI, MRI,
- Ctx, DisAsm, IP);
+ STI, MII, Ctx, DisAsm, IP);
assert(DC && "Allocation failure!");
return DC;
@@ -170,5 +182,5 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
return Size;
}
}
- return 0;
+ llvm_unreachable("Invalid DecodeStatus!");
}
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 238ff7d50025..880a31ad76b9 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -28,7 +28,9 @@ class MCContext;
class MCAsmInfo;
class MCDisassembler;
class MCInstPrinter;
+class MCInstrInfo;
class MCRegisterInfo;
+class MCSubtargetInfo;
class Target;
//
@@ -61,6 +63,10 @@ private:
llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
// The register information for the target architecture.
llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
+ // The subtarget information for the target architecture.
+ llvm::OwningPtr<const llvm::MCSubtargetInfo> MSI;
+ // The instruction information for the target architecture.
+ llvm::OwningPtr<const llvm::MCInstrInfo> MII;
// The assembly context for creating symbols and MCExprs.
llvm::OwningPtr<const llvm::MCContext> Ctx;
// The disassembler for the target architecture.
@@ -78,6 +84,8 @@ public:
LLVMSymbolLookupCallback symbolLookUp,
const Target *theTarget, const MCAsmInfo *mAI,
const MCRegisterInfo *mRI,
+ const MCSubtargetInfo *mSI,
+ const MCInstrInfo *mII,
llvm::MCContext *ctx, const MCDisassembler *disAsm,
MCInstPrinter *iP) : TripleName(tripleName),
DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
@@ -85,6 +93,8 @@ public:
CommentStream(CommentsToEmit) {
MAI.reset(mAI);
MRI.reset(mRI);
+ MSI.reset(mSI);
+ MII.reset(mII);
Ctx.reset(ctx);
DisAsm.reset(disAsm);
IP.reset(iP);
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 83362a21f77b..b2672ca3ccba 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -34,10 +35,8 @@
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
using namespace llvm;
-bool EDDisassembler::sInitialized = false;
EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
struct TripleMap {
@@ -49,8 +48,7 @@ static struct TripleMap triplemap[] = {
{ Triple::x86, "i386-unknown-unknown" },
{ Triple::x86_64, "x86_64-unknown-unknown" },
{ Triple::arm, "arm-unknown-unknown" },
- { Triple::thumb, "thumb-unknown-unknown" },
- { Triple::InvalidArch, NULL, }
+ { Triple::thumb, "thumb-unknown-unknown" }
};
/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
@@ -77,90 +75,69 @@ static const char *tripleFromArch(Triple::ArchType arch) {
static int getLLVMSyntaxVariant(Triple::ArchType arch,
EDDisassembler::AssemblySyntax syntax) {
switch (syntax) {
- default:
- return -1;
// Mappings below from X86AsmPrinter.cpp
case EDDisassembler::kEDAssemblySyntaxX86ATT:
if (arch == Triple::x86 || arch == Triple::x86_64)
return 0;
- else
- return -1;
+ break;
case EDDisassembler::kEDAssemblySyntaxX86Intel:
if (arch == Triple::x86 || arch == Triple::x86_64)
return 1;
- else
- return -1;
+ break;
case EDDisassembler::kEDAssemblySyntaxARMUAL:
if (arch == Triple::arm || arch == Triple::thumb)
return 0;
- else
- return -1;
+ break;
}
-}
-void EDDisassembler::initialize() {
- if (sInitialized)
- return;
-
- sInitialized = true;
-
- InitializeAllTargetInfos();
- InitializeAllTargetMCs();
- InitializeAllAsmParsers();
- InitializeAllDisassemblers();
+ return -1;
}
-#undef BRINGUP_TARGET
-
EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
AssemblySyntax syntax) {
+ const char *triple = tripleFromArch(arch);
+ return getDisassembler(StringRef(triple), syntax);
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+ AssemblySyntax syntax) {
CPUKey key;
- key.Arch = arch;
+ key.Triple = str.str();
key.Syntax = syntax;
-
+
EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
-
+
if (i != sDisassemblers.end()) {
- return i->second;
- } else {
- EDDisassembler* sdd = new EDDisassembler(key);
- if (!sdd->valid()) {
- delete sdd;
- return NULL;
- }
-
- sDisassemblers[key] = sdd;
-
- return sdd;
+ return i->second;
}
-
- return NULL;
-}
-EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
- AssemblySyntax syntax) {
- return getDisassembler(Triple(str).getArch(), syntax);
+ EDDisassembler *sdd = new EDDisassembler(key);
+ if (!sdd->valid()) {
+ delete sdd;
+ return NULL;
+ }
+
+ sDisassemblers[key] = sdd;
+
+ return sdd;
}
EDDisassembler::EDDisassembler(CPUKey &key) :
Valid(false),
HasSemantics(false),
ErrorStream(nulls()),
- Key(key) {
- const char *triple = tripleFromArch(key.Arch);
-
- if (!triple)
- return;
+ Key(key),
+ TgtTriple(key.Triple.c_str()) {
- LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+ LLVMSyntaxVariant = getLLVMSyntaxVariant(TgtTriple.getArch(), key.Syntax);
if (LLVMSyntaxVariant < 0)
return;
- std::string tripleString(triple);
+ std::string tripleString(key.Triple);
std::string errorString;
- Tgt = TargetRegistry::lookupTarget(tripleString,
+ Tgt = TargetRegistry::lookupTarget(key.Triple,
errorString);
if (!Tgt)
@@ -189,10 +166,16 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
return;
InstInfos = Disassembler->getEDInfo();
-
+
+ MII.reset(Tgt->createMCInstrInfo());
+
+ if (!MII)
+ return;
+
InstString.reset(new std::string);
InstStream.reset(new raw_string_ostream(*InstString));
- InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, *STI));
+ InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo,
+ *MII, *MRI, *STI));
if (!InstPrinter)
return;
@@ -279,7 +262,7 @@ void EDDisassembler::initMaps(const MCRegisterInfo &registerInfo) {
RegRMap[registerName] = registerIndex;
}
- switch (Key.Arch) {
+ switch (TgtTriple.getArch()) {
default:
break;
case Triple::x86:
@@ -337,13 +320,9 @@ int EDDisassembler::printInst(std::string &str, MCInst &inst) {
return 0;
}
-static void diag_handler(const SMDiagnostic &diag,
- void *context)
-{
- if (context) {
- EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
- diag.Print("", disassembler->ErrorStream);
- }
+static void diag_handler(const SMDiagnostic &diag, void *context) {
+ if (context)
+ diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream);
}
int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
@@ -351,7 +330,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
const std::string &str) {
int ret = 0;
- switch (Key.Arch) {
+ switch (TgtTriple.getArch()) {
default:
return -1;
case Triple::x86:
@@ -376,8 +355,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
context, *streamer,
*AsmInfo));
- StringRef triple = tripleFromArch(Key.Arch);
- OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
+ OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(Key.Triple.c_str(), "", ""));
OwningPtr<MCTargetAsmParser>
TargetParser(Tgt->createMCAsmParser(*STI, *genericParser));
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 38c22038c510..6f71908d2bcf 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -25,6 +25,7 @@
#include <map>
#include <set>
+#include <string>
#include <vector>
namespace llvm {
@@ -35,8 +36,9 @@ class MCContext;
class MCAsmInfo;
class MCAsmLexer;
class MCDisassembler;
-class MCInstPrinter;
class MCInst;
+class MCInstPrinter;
+class MCInstrInfo;
class MCParsedAsmOperand;
class MCRegisterInfo;
class MCStreamer;
@@ -74,28 +76,26 @@ struct EDDisassembler {
/// pair
struct CPUKey {
/// The architecture type
- llvm::Triple::ArchType Arch;
+ std::string Triple;
/// The assembly syntax
AssemblySyntax Syntax;
/// operator== - Equality operator
bool operator==(const CPUKey &key) const {
- return (Arch == key.Arch &&
+ return (Triple == key.Triple &&
Syntax == key.Syntax);
}
/// operator< - Less-than operator
bool operator<(const CPUKey &key) const {
- return ((Arch < key.Arch) ||
- ((Arch == key.Arch) && Syntax < (key.Syntax)));
+ return ((Triple < key.Triple) ||
+ ((Triple == key.Triple) && Syntax < (key.Syntax)));
}
};
typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
- /// True if the disassembler registry has been initialized; false if not
- static bool sInitialized;
/// A map from disassembler specifications to disassemblers. Populated
/// lazily.
static DisassemblerMap_t sDisassemblers;
@@ -116,9 +116,6 @@ struct EDDisassembler {
static EDDisassembler *getDisassembler(llvm::StringRef str,
AssemblySyntax syntax);
- /// initialize - Initializes the disassembler registry and the LLVM backend
- static void initialize();
-
////////////////////////
// Per-object members //
////////////////////////
@@ -131,14 +128,18 @@ struct EDDisassembler {
/// The stream to write errors to
llvm::raw_ostream &ErrorStream;
- /// The architecture/syntax pair for the current architecture
+ /// The triple/syntax pair for the current architecture
CPUKey Key;
+ /// The Triple fur the current architecture
+ Triple TgtTriple;
/// The LLVM target corresponding to the disassembler
const llvm::Target *Tgt;
/// The assembly information for the target architecture
llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
/// The subtarget information for the target architecture
llvm::OwningPtr<const llvm::MCSubtargetInfo> STI;
+ // The instruction information for the target architecture.
+ llvm::OwningPtr<const llvm::MCInstrInfo> MII;
// The register information for the target architecture.
llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
/// The disassembler for the target architecture
diff --git a/lib/MC/MCDisassembler/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp
new file mode 100644
index 000000000000..c658717b0249
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDMain.cpp
@@ -0,0 +1,280 @@
+//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the enhanced disassembler's public C API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+#include "llvm-c/EnhancedDisassembly.h"
+using namespace llvm;
+
+int EDGetDisassembler(EDDisassemblerRef *disassembler,
+ const char *triple,
+ EDAssemblySyntax_t syntax) {
+ EDDisassembler::AssemblySyntax Syntax;
+ switch (syntax) {
+ default: llvm_unreachable("Unknown assembly syntax!");
+ case kEDAssemblySyntaxX86Intel:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
+ break;
+ case kEDAssemblySyntaxX86ATT:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
+ break;
+ case kEDAssemblySyntaxARMUAL:
+ Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
+ break;
+ }
+
+ EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
+
+ if (!ret)
+ return -1;
+ *disassembler = ret;
+ return 0;
+}
+
+int EDGetRegisterName(const char** regName,
+ EDDisassemblerRef disassembler,
+ unsigned regID) {
+ const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
+ if (!name)
+ return -1;
+ *regName = name;
+ return 0;
+}
+
+int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
+ unsigned regID) {
+ return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
+}
+
+int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
+ unsigned regID) {
+ return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
+}
+
+unsigned int EDCreateInsts(EDInstRef *insts,
+ unsigned int count,
+ EDDisassemblerRef disassembler,
+ ::EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ unsigned int index;
+
+ for (index = 0; index < count; ++index) {
+ EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
+ address, arg);
+
+ if (!inst)
+ return index;
+
+ insts[index] = inst;
+ address += inst->byteSize();
+ }
+
+ return count;
+}
+
+void EDReleaseInst(EDInstRef inst) {
+ delete ((EDInst*)inst);
+}
+
+int EDInstByteSize(EDInstRef inst) {
+ return ((EDInst*)inst)->byteSize();
+}
+
+int EDGetInstString(const char **buf,
+ EDInstRef inst) {
+ return ((EDInst*)inst)->getString(*buf);
+}
+
+int EDInstID(unsigned *instID, EDInstRef inst) {
+ *instID = ((EDInst*)inst)->instID();
+ return 0;
+}
+
+int EDInstIsBranch(EDInstRef inst) {
+ return ((EDInst*)inst)->isBranch();
+}
+
+int EDInstIsMove(EDInstRef inst) {
+ return ((EDInst*)inst)->isMove();
+}
+
+int EDBranchTargetID(EDInstRef inst) {
+ return ((EDInst*)inst)->branchTargetID();
+}
+
+int EDMoveSourceID(EDInstRef inst) {
+ return ((EDInst*)inst)->moveSourceID();
+}
+
+int EDMoveTargetID(EDInstRef inst) {
+ return ((EDInst*)inst)->moveTargetID();
+}
+
+int EDNumTokens(EDInstRef inst) {
+ return ((EDInst*)inst)->numTokens();
+}
+
+int EDGetToken(EDTokenRef *token,
+ EDInstRef inst,
+ int index) {
+ return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
+}
+
+int EDGetTokenString(const char **buf,
+ EDTokenRef token) {
+ return ((EDToken*)token)->getString(*buf);
+}
+
+int EDOperandIndexForToken(EDTokenRef token) {
+ return ((EDToken*)token)->operandID();
+}
+
+int EDTokenIsWhitespace(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
+}
+
+int EDTokenIsPunctuation(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
+}
+
+int EDTokenIsOpcode(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
+}
+
+int EDTokenIsLiteral(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
+}
+
+int EDTokenIsRegister(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenRegister;
+}
+
+int EDTokenIsNegativeLiteral(EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+ return -1;
+
+ return ((EDToken*)token)->literalSign();
+}
+
+int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+ return -1;
+
+ return ((EDToken*)token)->literalAbsoluteValue(*value);
+}
+
+int EDRegisterTokenValue(unsigned *registerID,
+ EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenRegister)
+ return -1;
+
+ return ((EDToken*)token)->registerID(*registerID);
+}
+
+int EDNumOperands(EDInstRef inst) {
+ return ((EDInst*)inst)->numOperands();
+}
+
+int EDGetOperand(EDOperandRef *operand,
+ EDInstRef inst,
+ int index) {
+ return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
+}
+
+int EDOperandIsRegister(EDOperandRef operand) {
+ return ((EDOperand*)operand)->isRegister();
+}
+
+int EDOperandIsImmediate(EDOperandRef operand) {
+ return ((EDOperand*)operand)->isImmediate();
+}
+
+int EDOperandIsMemory(EDOperandRef operand) {
+ return ((EDOperand*)operand)->isMemory();
+}
+
+int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isRegister())
+ return -1;
+ *value = ((EDOperand*)operand)->regVal();
+ return 0;
+}
+
+int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isImmediate())
+ return -1;
+ *value = ((EDOperand*)operand)->immediateVal();
+ return 0;
+}
+
+int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
+ ::EDRegisterReaderCallback regReader, void *arg) {
+ return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
+}
+
+#ifdef __BLOCKS__
+
+struct ByteReaderWrapper {
+ EDByteBlock_t byteBlock;
+};
+
+static int readerWrapperCallback(uint8_t *byte,
+ uint64_t address,
+ void *arg) {
+ struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
+ return wrapper->byteBlock(byte, address);
+}
+
+unsigned int EDBlockCreateInsts(EDInstRef *insts,
+ int count,
+ EDDisassemblerRef disassembler,
+ EDByteBlock_t byteBlock,
+ uint64_t address) {
+ struct ByteReaderWrapper wrapper;
+ wrapper.byteBlock = byteBlock;
+
+ return EDCreateInsts(insts,
+ count,
+ disassembler,
+ readerWrapperCallback,
+ address,
+ (void*)&wrapper);
+}
+
+int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
+ EDRegisterBlock_t regBlock) {
+ return ((EDOperand*)operand)->evaluate(*result, regBlock);
+}
+
+int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
+ return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
+}
+
+#else
+
+extern "C" unsigned int EDBlockCreateInsts() {
+ return 0;
+}
+
+extern "C" int EDBlockEvaluateOperand() {
+ return -1;
+}
+
+extern "C" int EDBlockVisitTokens() {
+ return -1;
+}
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index 6a4e56ff72c4..48b374659d5e 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -30,8 +30,10 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
MCOpIndex(mcOpIndex) {
unsigned int numMCOperands = 0;
- if (Disassembler.Key.Arch == Triple::x86 ||
- Disassembler.Key.Arch == Triple::x86_64) {
+ Triple::ArchType arch = Disassembler.TgtTriple.getArch();
+
+ if (arch == Triple::x86 ||
+ arch == Triple::x86_64) {
uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
switch (operandType) {
@@ -54,8 +56,8 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
break;
}
}
- else if (Disassembler.Key.Arch == Triple::arm ||
- Disassembler.Key.Arch == Triple::thumb) {
+ else if (arch == Triple::arm ||
+ arch == Triple::thumb) {
uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
switch (operandType) {
@@ -126,7 +128,9 @@ int EDOperand::evaluate(uint64_t &result,
void *arg) {
uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
- switch (Disassembler.Key.Arch) {
+ Triple::ArchType arch = Disassembler.TgtTriple.getArch();
+
+ switch (arch) {
default:
return -1;
case Triple::x86:
@@ -168,7 +172,7 @@ int EDOperand::evaluate(uint64_t &result,
unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
- if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) {
+ if (segmentReg != 0 && arch == Triple::x86_64) {
unsigned fsID = Disassembler.registerIDWithName("FS");
unsigned gsID = Disassembler.registerIDWithName("GS");
@@ -200,7 +204,6 @@ int EDOperand::evaluate(uint64_t &result,
return 0;
}
} // switch (operandType)
- break;
case Triple::arm:
case Triple::thumb:
switch (operandType) {
@@ -236,10 +239,7 @@ int EDOperand::evaluate(uint64_t &result,
return 0;
}
}
- break;
}
-
- return -1;
}
int EDOperand::isRegister() {
diff --git a/lib/MC/MCDisassembler/LLVMBuild.txt b/lib/MC/MCDisassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..d73c6adcbb47
--- /dev/null
+++ b/lib/MC/MCDisassembler/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/MC/MCDisassembler/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCDisassembler
+parent = MC
+required_libraries = MC MCParser Support
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 4658a3093fab..84a34f1d8735 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,10 +19,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
using namespace llvm;
// Given a special op, return the address skip amount (in units of
@@ -207,7 +209,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
//
// This emits the Dwarf file and the line tables.
//
-void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
MCContext &context = MCOS->getContext();
// Switch to the section where the table will be emitted into.
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
@@ -320,6 +322,8 @@ void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
// This is the end of the section, so set the value of the symbol at the end
// of this section (that was used in a previous expression).
MCOS->EmitLabel(LineEndSym);
+
+ return LineStartSym;
}
/// Utility function to write the encoding to an object writer.
@@ -372,10 +376,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
// it with DW_LNS_advance_line.
if (Temp >= DWARF2_LINE_RANGE) {
OS << char(dwarf::DW_LNS_advance_line);
- SmallString<32> Tmp;
- raw_svector_ostream OSE(Tmp);
- MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
- OS << OSE.str();
+ MCObjectWriter::EncodeSLEB128(LineDelta, OS);
LineDelta = 0;
Temp = 0 - DWARF2_LINE_BASE;
@@ -411,10 +412,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
// Otherwise use DW_LNS_advance_pc.
OS << char(dwarf::DW_LNS_advance_pc);
- SmallString<32> Tmp;
- raw_svector_ostream OSE(Tmp);
- MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
- OS << OSE.str();
+ MCObjectWriter::EncodeULEB128(AddrDelta, OS);
if (NeedCopy)
OS << char(dwarf::DW_LNS_copy);
@@ -430,6 +428,349 @@ void MCDwarfFile::dump() const {
print(dbgs());
}
+// Utility function to write a tuple for .debug_abbrev.
+static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
+ MCOS->EmitULEB128IntValue(Name);
+ MCOS->EmitULEB128IntValue(Form);
+}
+
+// When generating dwarf for assembly source files this emits
+// the data for .debug_abbrev section which contains three DIEs.
+static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+
+ // DW_TAG_compile_unit DIE abbrev (1).
+ MCOS->EmitULEB128IntValue(1);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+ EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
+ StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+ if (!DwarfDebugFlags.empty())
+ EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // DW_TAG_label DIE abbrev (2).
+ MCOS->EmitULEB128IntValue(2);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_label);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+ EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // DW_TAG_unspecified_parameters DIE abbrev (3).
+ MCOS->EmitULEB128IntValue(3);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // Terminate the abbreviations for this compilation unit.
+ MCOS->EmitIntValue(0, 1);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_aranges section. Which contains a header and a table of pairs of
+// PointerSize'ed values for the address and size of section(s) with line table
+// entries (just the default .text in our case) and a terminating pair of zeros.
+static void EmitGenDwarfAranges(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+
+ // Create a symbol at the end of the section that we are creating the dwarf
+ // debugging info to use later in here as part of the expression to calculate
+ // the size of the section for the table.
+ MCOS->SwitchSection(context.getGenDwarfSection());
+ MCSymbol *SectionEndSym = context.CreateTempSymbol();
+ MCOS->EmitLabel(SectionEndSym);
+ context.setGenDwarfSectionEndSym(SectionEndSym);
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+ // This will be the length of the .debug_aranges section, first account for
+ // the size of each item in the header (see below where we emit these items).
+ int Length = 4 + 2 + 4 + 1 + 1;
+
+ // Figure the padding after the header before the table of address and size
+ // pairs who's values are PointerSize'ed.
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int AddrSize = asmInfo.getPointerSize();
+ int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
+ if (Pad == 2 * AddrSize)
+ Pad = 0;
+ Length += Pad;
+
+ // Add the size of the pair of PointerSize'ed values for the address and size
+ // of the one default .text section we have in the table.
+ Length += 2 * AddrSize;
+ // And the pair of terminating zeros.
+ Length += 2 * AddrSize;
+
+
+ // Emit the header for this section.
+ // The 4 byte length not including the 4 byte value for the length.
+ MCOS->EmitIntValue(Length - 4, 4);
+ // The 2 byte version, which is 2.
+ MCOS->EmitIntValue(2, 2);
+ // The 4 byte offset to the compile unit in the .debug_info from the start
+ // of the .debug_info, it is at the start of that section so this is zero.
+ MCOS->EmitIntValue(0, 4);
+ // The 1 byte size of an address.
+ MCOS->EmitIntValue(AddrSize, 1);
+ // The 1 byte size of a segment descriptor, we use a value of zero.
+ MCOS->EmitIntValue(0, 1);
+ // Align the header with the padding if needed, before we put out the table.
+ for(int i = 0; i < Pad; i++)
+ MCOS->EmitIntValue(0, 1);
+
+ // Now emit the table of pairs of PointerSize'ed values for the section(s)
+ // address and size, in our case just the one default .text section.
+ const MCExpr *Addr = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+ const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+ *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
+ MCOS->EmitAbsValue(Addr, AddrSize);
+ MCOS->EmitAbsValue(Size, AddrSize);
+
+ // And finally the pair of terminating zeros.
+ MCOS->EmitIntValue(0, AddrSize);
+ MCOS->EmitIntValue(0, AddrSize);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_info section which contains three parts. The header, the compile_unit
+// DIE and a list of label DIEs.
+static void EmitGenDwarfInfo(MCStreamer *MCOS,
+ const MCSymbol *AbbrevSectionSymbol,
+ const MCSymbol *LineSectionSymbol) {
+ MCContext &context = MCOS->getContext();
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+
+ // Create a symbol at the start and end of this section used in here for the
+ // expression to calculate the length in the header.
+ MCSymbol *InfoStart = context.CreateTempSymbol();
+ MCOS->EmitLabel(InfoStart);
+ MCSymbol *InfoEnd = context.CreateTempSymbol();
+
+ // First part: the header.
+
+ // The 4 byte total length of the information for this compilation unit, not
+ // including these 4 bytes.
+ const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
+ MCOS->EmitAbsValue(Length, 4);
+
+ // The 2 byte DWARF version, which is 2.
+ MCOS->EmitIntValue(2, 2);
+
+ // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
+ // it is at the start of that section so this is zero.
+ if (AbbrevSectionSymbol) {
+ MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4);
+ } else {
+ MCOS->EmitIntValue(0, 4);
+ }
+
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int AddrSize = asmInfo.getPointerSize();
+ // The 1 byte size of an address.
+ MCOS->EmitIntValue(AddrSize, 1);
+
+ // Second part: the compile_unit DIE.
+
+ // The DW_TAG_compile_unit DIE abbrev (1).
+ MCOS->EmitULEB128IntValue(1);
+
+ // DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
+ // which is at the start of that section so this is zero.
+ if (LineSectionSymbol) {
+ MCOS->EmitSymbolValue(LineSectionSymbol, 4);
+ } else {
+ MCOS->EmitIntValue(0, 4);
+ }
+
+ // AT_low_pc, the first address of the default .text section.
+ const MCExpr *Start = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(Start, AddrSize);
+
+ // AT_high_pc, the last address of the default .text section.
+ const MCExpr *End = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(End, AddrSize);
+
+ // AT_name, the name of the source file. Reconstruct from the first directory
+ // and file table entries.
+ const std::vector<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs();
+ if (MCDwarfDirs.size() > 0) {
+ MCOS->EmitBytes(MCDwarfDirs[0], 0);
+ MCOS->EmitBytes("/", 0);
+ }
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles();
+ MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_comp_dir, the working directory the assembly was done in.
+ llvm::sys::Path CWD = llvm::sys::Path::GetCurrentDirectory();
+ MCOS->EmitBytes(StringRef(CWD.c_str()), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_APPLE_flags, the command line arguments of the assembler tool.
+ StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+ if (!DwarfDebugFlags.empty()){
+ MCOS->EmitBytes(DwarfDebugFlags, 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+ }
+
+ // AT_producer, the version of the assembler tool.
+ MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0);
+ MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0);
+ MCOS->EmitBytes(StringRef(")"), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2
+ // draft has no standard code for assembler.
+ MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2);
+
+ // Third part: the list of label DIEs.
+
+ // Loop on saved info for dwarf labels and create the DIEs for them.
+ const std::vector<const MCGenDwarfLabelEntry *> &Entries =
+ MCOS->getContext().getMCGenDwarfLabelEntries();
+ for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
+ Entries.begin(), ie = Entries.end(); it != ie;
+ ++it) {
+ const MCGenDwarfLabelEntry *Entry = *it;
+
+ // The DW_TAG_label DIE abbrev (2).
+ MCOS->EmitULEB128IntValue(2);
+
+ // AT_name, of the label without any leading underbar.
+ MCOS->EmitBytes(Entry->getName(), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_decl_file, index into the file table.
+ MCOS->EmitIntValue(Entry->getFileNumber(), 4);
+
+ // AT_decl_line, source line number.
+ MCOS->EmitIntValue(Entry->getLineNumber(), 4);
+
+ // AT_low_pc, start address of the label.
+ const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
+ MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(AT_low_pc, AddrSize);
+
+ // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype.
+ MCOS->EmitIntValue(0, 1);
+
+ // The DW_TAG_unspecified_parameters DIE abbrev (3).
+ MCOS->EmitULEB128IntValue(3);
+
+ // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
+ MCOS->EmitIntValue(0, 1);
+ }
+ // Deallocate the MCGenDwarfLabelEntry classes that saved away the info
+ // for the dwarf labels.
+ for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
+ Entries.begin(), ie = Entries.end(); it != ie;
+ ++it) {
+ const MCGenDwarfLabelEntry *Entry = *it;
+ delete Entry;
+ }
+
+ // Add the NULL DIE terminating the Compile Unit DIE's.
+ MCOS->EmitIntValue(0, 1);
+
+ // Now set the value of the symbol at the end of the info section.
+ MCOS->EmitLabel(InfoEnd);
+}
+
+//
+// When generating dwarf for assembly source files this emits the Dwarf
+// sections.
+//
+void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
+ // Create the dwarf sections in this order (.debug_line already created).
+ MCContext &context = MCOS->getContext();
+ const MCAsmInfo &AsmInfo = context.getAsmInfo();
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+ MCSymbol *AbbrevSectionSymbol;
+ if (AsmInfo.doesDwarfRequireRelocationForSectionOffset()) {
+ AbbrevSectionSymbol = context.CreateTempSymbol();
+ MCOS->EmitLabel(AbbrevSectionSymbol);
+ } else {
+ AbbrevSectionSymbol = NULL;
+ LineSectionSymbol = NULL;
+ }
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+ // If there are no line table entries then do not emit any section contents.
+ if (context.getMCLineSections().empty())
+ return;
+
+ // Output the data for .debug_aranges section.
+ EmitGenDwarfAranges(MCOS);
+
+ // Output the data for .debug_abbrev section.
+ EmitGenDwarfAbbrev(MCOS);
+
+ // Output the data for .debug_info section.
+ EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
+}
+
+//
+// When generating dwarf for assembly source files this is called when symbol
+// for a label is created. If this symbol is not a temporary and is in the
+// section that dwarf is being generated for, save the needed info to create
+// a dwarf label.
+//
+void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
+ SourceMgr &SrcMgr, SMLoc &Loc) {
+ // We won't create dwarf labels for temporary symbols or symbols not in
+ // the default text.
+ if (Symbol->isTemporary())
+ return;
+ MCContext &context = MCOS->getContext();
+ if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+ return;
+
+ // The dwarf label's name does not have the symbol name's leading
+ // underbar if any.
+ StringRef Name = Symbol->getName();
+ if (Name.startswith("_"))
+ Name = Name.substr(1, Name.size()-1);
+
+ // Get the dwarf file number to be used for the dwarf label.
+ unsigned FileNumber = context.getGenDwarfFileNumber();
+
+ // Finding the line number is the expensive part which is why we just don't
+ // pass it in as for some symbols we won't create a dwarf label.
+ int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
+
+ // We create a temporary symbol for use for the AT_high_pc and AT_low_pc
+ // values so that they don't have things like an ARM thumb bit from the
+ // original symbol. So when used they won't get a low bit set after
+ // relocation.
+ MCSymbol *Label = context.CreateTempSymbol();
+ MCOS->EmitLabel(Label);
+
+ // Create and entry for the info and add it to the other entries.
+ MCGenDwarfLabelEntry *Entry =
+ new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label);
+ MCOS->getContext().addMCGenDwarfLabelEntry(Entry);
+}
+
static int getDataAlignmentFactor(MCStreamer &streamer) {
MCContext &context = streamer.getContext();
const MCAsmInfo &asmInfo = context.getAsmInfo();
@@ -445,8 +786,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
MCContext &context = streamer.getContext();
unsigned format = symbolEncoding & 0x0f;
switch (format) {
- default:
- assert(0 && "Unknown Encoding");
+ default: llvm_unreachable("Unknown Encoding");
case dwarf::DW_EH_PE_absptr:
case dwarf::DW_EH_PE_signed:
return context.getAsmInfo().getPointerSize();
@@ -520,6 +860,7 @@ namespace {
const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
+ bool IsSignalFrame,
unsigned lsdaEncoding);
MCSymbol *EmitFDE(MCStreamer &streamer,
const MCSymbol &cieStart,
@@ -536,28 +877,40 @@ namespace {
static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
StringRef Prefix) {
if (Streamer.isVerboseAsm()) {
- const char *EncStr = 0;
+ const char *EncStr;
switch (Encoding) {
- default: EncStr = "<unknown encoding>";
- case dwarf::DW_EH_PE_absptr: EncStr = "absptr";
- case dwarf::DW_EH_PE_omit: EncStr = "omit";
- case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel";
- case dwarf::DW_EH_PE_udata4: EncStr = "udata4";
- case dwarf::DW_EH_PE_udata8: EncStr = "udata8";
- case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4";
- case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8";
- case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4";
- case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4";
- case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8";
- case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8";
+ default: EncStr = "<unknown encoding>"; break;
+ case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; break;
+ case dwarf::DW_EH_PE_omit: EncStr = "omit"; break;
+ case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel"; break;
+ case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; break;
+ case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; break;
+ case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; break;
+ case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+ EncStr = "pcrel udata4";
+ break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+ EncStr = "pcrel sdata4";
+ break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+ EncStr = "pcrel udata8";
+ break;
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+ EncStr = "screl sdata8";
+ break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
EncStr = "indirect pcrel udata4";
+ break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
EncStr = "indirect pcrel sdata4";
+ break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
EncStr = "indirect pcrel udata8";
+ break;
case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
EncStr = "indirect pcrel sdata8";
+ break;
}
Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
@@ -639,11 +992,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
}
return;
}
- case MCCFIInstruction::Remember:
+ case MCCFIInstruction::RememberState:
if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
return;
- case MCCFIInstruction::Restore:
+ case MCCFIInstruction::RestoreState:
if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
return;
@@ -655,6 +1008,19 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
Streamer.EmitULEB128IntValue(Reg);
return;
}
+ case MCCFIInstruction::Restore: {
+ unsigned Reg = Instr.getDestination().getReg();
+ if (VerboseAsm) {
+ Streamer.AddComment("DW_CFA_restore");
+ Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ }
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+ return;
+ }
+ case MCCFIInstruction::Escape:
+ if (VerboseAsm) Streamer.AddComment("Escape bytes");
+ Streamer.EmitBytes(Instr.getValues(), 0);
+ return;
}
llvm_unreachable("Unhandled case in switch");
}
@@ -738,8 +1104,8 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
// Compact Encoding
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
- if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") +
- Twine(llvm::utohexstr(Encoding)));
+ if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding: 0x" +
+ Twine::utohexstr(Encoding));
Streamer.EmitIntValue(Encoding, Size);
@@ -766,6 +1132,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
+ bool IsSignalFrame,
unsigned lsdaEncoding) {
MCContext &context = streamer.getContext();
const MCRegisterInfo &MRI = context.getRegisterInfo();
@@ -808,6 +1175,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
if (lsda)
Augmentation += "L";
Augmentation += "R";
+ if (IsSignalFrame)
+ Augmentation += "S";
streamer.EmitBytes(Augmentation.str(), 0);
}
streamer.EmitIntValue(0, 1);
@@ -967,17 +1336,18 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
namespace {
struct CIEKey {
- static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
- static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+ static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false); }
+ static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false); }
CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
- unsigned LsdaEncoding_) : Personality(Personality_),
- PersonalityEncoding(PersonalityEncoding_),
- LsdaEncoding(LsdaEncoding_) {
+ unsigned LsdaEncoding_, bool IsSignalFrame_) :
+ Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
+ LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_) {
}
const MCSymbol* Personality;
unsigned PersonalityEncoding;
unsigned LsdaEncoding;
+ bool IsSignalFrame;
};
}
@@ -991,17 +1361,17 @@ namespace llvm {
return CIEKey::getTombstoneKey();
}
static unsigned getHashValue(const CIEKey &Key) {
- FoldingSetNodeID ID;
- ID.AddPointer(Key.Personality);
- ID.AddInteger(Key.PersonalityEncoding);
- ID.AddInteger(Key.LsdaEncoding);
- return ID.ComputeHash();
+ return static_cast<unsigned>(hash_combine(Key.Personality,
+ Key.PersonalityEncoding,
+ Key.LsdaEncoding,
+ Key.IsSignalFrame));
}
static bool isEqual(const CIEKey &LHS,
const CIEKey &RHS) {
return LHS.Personality == RHS.Personality &&
LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
- LHS.LsdaEncoding == RHS.LsdaEncoding;
+ LHS.LsdaEncoding == RHS.LsdaEncoding &&
+ LHS.IsSignalFrame == RHS.IsSignalFrame;
}
};
}
@@ -1016,12 +1386,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
// Emit the compact unwind info if available.
- // FIXME: This emits both the compact unwind and the old CIE/FDE
- // information. Only one of those is needed.
if (IsEH && MOFI->getCompactUnwindSection())
for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
- if (!Frame.CompactUnwindEncoding)
+ if (Frame.CompactUnwindEncoding)
Emitter.EmitCompactUnwind(Streamer, Frame);
}
@@ -1039,11 +1407,12 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
const MCDwarfFrameInfo &Frame = FrameArray[i];
CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
- Frame.LsdaEncoding);
+ Frame.LsdaEncoding, Frame.IsSignalFrame);
const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
if (!CIEStart)
CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
Frame.PersonalityEncoding, Frame.Lsda,
+ Frame.IsSignalFrame,
Frame.LsdaEncoding);
FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index dad2e7ba9878..f9f98e0f730e 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -37,7 +37,7 @@ void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
- Type == ELF::STT_TLS);
+ Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
@@ -48,7 +48,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) {
assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
- Type == ELF::STT_TLS);
+ Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
return Type;
}
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 12a02a9e9740..171ab4d9bf28 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -7,17 +7,40 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
using namespace llvm;
MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
- Triple::OSType OSType_,
+ uint8_t OSABI_,
uint16_t EMachine_,
bool HasRelocationAddend_)
- : OSType(OSType_), EMachine(EMachine_),
+ : OSABI(OSABI_), EMachine(EMachine_),
HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
}
-MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+/// Default e_flags = 0
+unsigned MCELFObjectTargetWriter::getEFlags() const {
+ return 0;
+}
+
+const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return NULL;
+}
+
+
+void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup,
+ uint64_t &RelocOffset) {
+}
+
+void
+MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+ // Sort by the r_offset, just like gnu as does.
+ array_pod_sort(Relocs.begin(), Relocs.end());
}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 9ada08ea9530..6c4d0e33a115 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -11,13 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCELFStreamer.h"
#include "MCELF.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
@@ -29,6 +33,123 @@
using namespace llvm;
+namespace {
+class MCELFStreamer : public MCObjectStreamer {
+public:
+ MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ MCAssembler *Assembler)
+ : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
+
+
+ ~MCELFStreamer() {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void ChangeSection(const MCSection *Section);
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolType(int Type) {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+
+ virtual void EndCOFFSymbolDef() {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setSize(Value);
+ }
+
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ llvm_unreachable("ELF doesn't support this directive");
+ }
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+
+ virtual void EmitFileDirective(StringRef Filename);
+
+ virtual void FinishImpl();
+
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst);
+ virtual void EmitInstToData(const MCInst &Inst);
+
+ void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+ struct LocalCommon {
+ MCSymbolData *SD;
+ uint64_t Size;
+ unsigned ByteAlignment;
+ };
+ std::vector<LocalCommon> LocalCommons;
+
+ SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+ /// @}
+ void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind) {
+ SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+ }
+
+ void SetSectionData() {
+ SetSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+ }
+ void SetSectionText() {
+ SetSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC, SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+ }
+ void SetSectionBss() {
+ SetSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC, SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+ }
+};
+}
+
void MCELFStreamer::InitSections() {
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
@@ -61,7 +182,7 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
return;
}
- assert(0 && "invalid assembler flag!");
+ llvm_unreachable("invalid assembler flag!");
}
void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
@@ -130,10 +251,8 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_WeakDefinition:
case MCSA_WeakDefAutoPrivate:
case MCSA_Invalid:
- case MCSA_ELF_TypeIndFunction:
case MCSA_IndirectSymbol:
- assert(0 && "Invalid symbol attribute for ELF!");
- break;
+ llvm_unreachable("Invalid symbol attribute for ELF!");
case MCSA_ELF_TypeGnuUniqueObject:
// Ignore for now.
@@ -162,6 +281,10 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCELF::SetType(SD, ELF::STT_FUNC);
break;
+ case MCSA_ELF_TypeIndFunction:
+ MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+ break;
+
case MCSA_ELF_TypeObject:
MCELF::SetType(SD, ELF::STT_OBJECT);
break;
@@ -205,10 +328,10 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
if (MCELF::GetBinding(SD) == ELF_STB_Local) {
const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
- ELF::SHT_NOBITS,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getBSS());
+ ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getBSS());
Symbol->setSection(*Section);
struct LocalCommon L = {&SD, Size, ByteAlignment};
@@ -266,6 +389,13 @@ void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
getCurrentSectionData()->setAlignment(ByteAlignment);
}
+void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ fixSymbolsInTLSFixups(Value);
+ MCObjectStreamer::EmitValueImpl(Value, Size, AddrSpace);
+}
+
+
// Add a symbol for the file name of this module. This is the second
// entry in the module's symbol table (the first being the null symbol).
void MCELFStreamer::EmitFileDirective(StringRef Filename) {
@@ -308,6 +438,10 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
case MCSymbolRefExpr::VK_ARM_TLSGD:
case MCSymbolRefExpr::VK_ARM_TPOFF:
case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ case MCSymbolRefExpr::VK_Mips_TLSGD:
+ case MCSymbolRefExpr::VK_Mips_GOTTPREL:
+ case MCSymbolRefExpr::VK_Mips_TPREL_HI:
+ case MCSymbolRefExpr::VK_Mips_TPREL_LO:
break;
}
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
@@ -349,7 +483,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
DF->getContents().append(Code.begin(), Code.end());
}
-void MCELFStreamer::Finish() {
+void MCELFStreamer::FinishImpl() {
EmitFrames(true);
for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
@@ -372,7 +506,7 @@ void MCELFStreamer::Finish() {
SectData.setAlignment(ByteAlignment);
}
- this->MCObjectStreamer::Finish();
+ this->MCObjectStreamer::FinishImpl();
}
MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
deleted file mode 100644
index 10bf77580998..000000000000
--- a/lib/MC/MCELFStreamer.h
+++ /dev/null
@@ -1,141 +0,0 @@
-//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits ELF .o object files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCELFSTREAMER_H
-#define LLVM_MC_MCELFSTREAMER_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-
-class MCELFStreamer : public MCObjectStreamer {
-public:
- MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter)
- : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
- MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- MCAssembler *Assembler)
- : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
-
-
- ~MCELFStreamer() {}
-
- /// @name MCStreamer Interface
- /// @{
-
- virtual void InitSections();
- virtual void ChangeSection(const MCSection *Section);
- virtual void EmitLabel(MCSymbol *Symbol);
- virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
- virtual void EmitThumbFunc(MCSymbol *Func);
- virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
- virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
- virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
- assert(0 && "ELF doesn't support this directive");
- }
- virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment);
- virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
- assert(0 && "ELF doesn't support this directive");
- }
-
- virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
- assert(0 && "ELF doesn't support this directive");
- }
-
- virtual void EmitCOFFSymbolType(int Type) {
- assert(0 && "ELF doesn't support this directive");
- }
-
- virtual void EndCOFFSymbolDef() {
- assert(0 && "ELF doesn't support this directive");
- }
-
- virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.setSize(Value);
- }
-
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment);
-
- virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
- unsigned Size = 0, unsigned ByteAlignment = 0) {
- assert(0 && "ELF doesn't support this directive");
- }
- virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment = 0) {
- assert(0 && "ELF doesn't support this directive");
- }
- virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
- unsigned ValueSize = 1,
- unsigned MaxBytesToEmit = 0);
- virtual void EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit = 0);
-
- virtual void EmitFileDirective(StringRef Filename);
-
- virtual void Finish();
-
-private:
- virtual void EmitInstToFragment(const MCInst &Inst);
- virtual void EmitInstToData(const MCInst &Inst);
-
- void fixSymbolsInTLSFixups(const MCExpr *expr);
-
- struct LocalCommon {
- MCSymbolData *SD;
- uint64_t Size;
- unsigned ByteAlignment;
- };
- std::vector<LocalCommon> LocalCommons;
-
- SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
- /// @}
- void SetSection(StringRef Section, unsigned Type, unsigned Flags,
- SectionKind Kind) {
- SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
- }
-
- void SetSectionData() {
- SetSection(".data", ELF::SHT_PROGBITS,
- ELF::SHF_WRITE |ELF::SHF_ALLOC,
- SectionKind::getDataRel());
- EmitCodeAlignment(4, 0);
- }
- void SetSectionText() {
- SetSection(".text", ELF::SHT_PROGBITS,
- ELF::SHF_EXECINSTR |
- ELF::SHF_ALLOC, SectionKind::getText());
- EmitCodeAlignment(4, 0);
- }
- void SetSectionBss() {
- SetSection(".bss", ELF::SHT_NOBITS,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC, SectionKind::getBSS());
- EmitCodeAlignment(4, 0);
- }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index da297fb1d95a..78801557af3e 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -14,9 +14,11 @@
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -57,7 +59,8 @@ void MCExpr::print(raw_ostream &OS) const {
SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
- SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1)
OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
@@ -70,7 +73,6 @@ void MCExpr::print(raw_ostream &OS) const {
case MCExpr::Unary: {
const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
switch (UE.getOpcode()) {
- default: assert(0 && "Invalid opcode!");
case MCUnaryExpr::LNot: OS << '!'; break;
case MCUnaryExpr::Minus: OS << '-'; break;
case MCUnaryExpr::Not: OS << '~'; break;
@@ -91,7 +93,6 @@ void MCExpr::print(raw_ostream &OS) const {
}
switch (BE.getOpcode()) {
- default: assert(0 && "Invalid opcode!");
case MCBinaryExpr::Add:
// Print "X-42" instead of "X+-42".
if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
@@ -132,7 +133,7 @@ void MCExpr::print(raw_ostream &OS) const {
}
}
- assert(0 && "Invalid expression kind!");
+ llvm_unreachable("Invalid expression kind!");
}
void MCExpr::dump() const {
@@ -171,7 +172,6 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
switch (Kind) {
- default:
case VK_Invalid: return "<<invalid>>";
case VK_None: return "<<none>>";
@@ -189,18 +189,39 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_TPOFF: return "TPOFF";
case VK_DTPOFF: return "DTPOFF";
case VK_TLVP: return "TLVP";
+ case VK_SECREL: return "SECREL";
case VK_ARM_PLT: return "(PLT)";
case VK_ARM_GOT: return "(GOT)";
case VK_ARM_GOTOFF: return "(GOTOFF)";
case VK_ARM_TPOFF: return "(tpoff)";
case VK_ARM_GOTTPOFF: return "(gottpoff)";
case VK_ARM_TLSGD: return "(tlsgd)";
+ case VK_ARM_TARGET1: return "(target1)";
case VK_PPC_TOC: return "toc";
case VK_PPC_DARWIN_HA16: return "ha16";
case VK_PPC_DARWIN_LO16: return "lo16";
case VK_PPC_GAS_HA16: return "ha";
case VK_PPC_GAS_LO16: return "l";
+ case VK_Mips_GPREL: return "GPREL";
+ case VK_Mips_GOT_CALL: return "GOT_CALL";
+ case VK_Mips_GOT16: return "GOT16";
+ case VK_Mips_GOT: return "GOT";
+ case VK_Mips_ABS_HI: return "ABS_HI";
+ case VK_Mips_ABS_LO: return "ABS_LO";
+ case VK_Mips_TLSGD: return "TLSGD";
+ case VK_Mips_TLSLDM: return "TLSLDM";
+ case VK_Mips_DTPREL_HI: return "DTPREL_HI";
+ case VK_Mips_DTPREL_LO: return "DTPREL_LO";
+ case VK_Mips_GOTTPREL: return "GOTTPREL";
+ case VK_Mips_TPREL_HI: return "TPREL_HI";
+ case VK_Mips_TPREL_LO: return "TPREL_LO";
+ case VK_Mips_GPOFF_HI: return "GPOFF_HI";
+ case VK_Mips_GPOFF_LO: return "GPOFF_LO";
+ case VK_Mips_GOT_DISP: return "GOT_DISP";
+ case VK_Mips_GOT_PAGE: return "GOT_PAGE";
+ case VK_Mips_GOT_OFST: return "GOT_OFST";
}
+ llvm_unreachable("Invalid variant kind");
}
MCSymbolRefExpr::VariantKind
@@ -337,6 +358,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
if (Addrs && (&SecA != &SecB))
Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+ // Pointers to Thumb symbols need to have their low-bit set to allow
+ // for interworking.
+ if (Asm->isThumbFunc(&SA))
+ Addend |= 1;
+
// Clear the symbol expr pointers to indicate we have folded these
// operands.
A = B = 0;
@@ -557,8 +583,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
}
}
- assert(0 && "Invalid assembly expression kind!");
- return false;
+ llvm_unreachable("Invalid assembly expression kind!");
}
const MCSection *MCExpr::FindAssociatedSection() const {
@@ -599,6 +624,5 @@ const MCSection *MCExpr::FindAssociatedSection() const {
}
}
- assert(0 && "Invalid assembly expression kind!");
- return 0;
+ llvm_unreachable("Invalid assembly expression kind!");
}
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 4cb628b395c3..7bbfd2efa136 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -25,6 +25,8 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
OS << "Imm:" << getImm();
else if (isExpr()) {
OS << "Expr:(" << *getExpr() << ")";
+ } else if (isInst()) {
+ OS << "Inst:(" << *getInst() << ")";
} else
OS << "UNDEFINED";
OS << ">";
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 2317a2891f8b..847bcc0a1604 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,8 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -19,11 +21,11 @@ MCInstPrinter::~MCInstPrinter() {
/// getOpcodeName - Return the name of the specified opcode enum (e.g.
/// "MOV32ri") or empty if we can't resolve it.
StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
- return "";
+ return MII.getName(Opcode);
}
void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- assert(0 && "Target should implement this");
+ llvm_unreachable("Target should implement this");
}
void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
deleted file mode 100644
index 3fe8ac72c8ec..000000000000
--- a/lib/MC/MCLoggingStreamer.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-
-class MCLoggingStreamer : public MCStreamer {
- llvm::OwningPtr<MCStreamer> Child;
-
- raw_ostream &OS;
-
-public:
- MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
- : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
-
- void LogCall(const char *Function) {
- OS << Function << "\n";
- }
-
- void LogCall(const char *Function, const Twine &Message) {
- OS << Function << ": " << Message << "\n";
- }
-
- virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
-
- virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
-
- virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
-
- virtual void AddComment(const Twine &T) {
- LogCall("AddComment", T);
- return Child->AddComment(T);
- }
-
- virtual void AddBlankLine() {
- LogCall("AddBlankLine");
- return Child->AddBlankLine();
- }
-
- virtual void ChangeSection(const MCSection *Section) {
- LogCall("ChangeSection");
- return Child->ChangeSection(Section);
- }
-
- virtual void InitSections() {
- LogCall("InitSections");
- return Child->InitSections();
- }
-
- virtual void EmitLabel(MCSymbol *Symbol) {
- LogCall("EmitLabel");
- return Child->EmitLabel(Symbol);
- }
-
- virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
- LogCall("EmitAssemblerFlag");
- return Child->EmitAssemblerFlag(Flag);
- }
-
- virtual void EmitThumbFunc(MCSymbol *Func) {
- LogCall("EmitThumbFunc");
- return Child->EmitThumbFunc(Func);
- }
-
- virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- LogCall("EmitAssignment");
- return Child->EmitAssignment(Symbol, Value);
- }
-
- virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
- LogCall("EmitWeakReference");
- return Child->EmitWeakReference(Alias, Symbol);
- }
-
- virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
- const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize) {
- LogCall("EmitDwarfAdvanceLineAddr");
- return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
- PointerSize);
- }
-
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
- LogCall("EmitSymbolAttribute");
- return Child->EmitSymbolAttribute(Symbol, Attribute);
- }
-
- virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
- LogCall("EmitSymbolDesc");
- return Child->EmitSymbolDesc(Symbol, DescValue);
- }
-
- virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
- LogCall("BeginCOFFSymbolDef");
- return Child->BeginCOFFSymbolDef(Symbol);
- }
-
- virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
- LogCall("EmitCOFFSymbolStorageClass");
- return Child->EmitCOFFSymbolStorageClass(StorageClass);
- }
-
- virtual void EmitCOFFSymbolType(int Type) {
- LogCall("EmitCOFFSymbolType");
- return Child->EmitCOFFSymbolType(Type);
- }
-
- virtual void EndCOFFSymbolDef() {
- LogCall("EndCOFFSymbolDef");
- return Child->EndCOFFSymbolDef();
- }
-
- virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- LogCall("EmitELFSize");
- return Child->EmitELFSize(Symbol, Value);
- }
-
- virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
- LogCall("EmitCommonSymbol");
- return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
- }
-
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
- LogCall("EmitLocalCommonSymbol");
- return Child->EmitLocalCommonSymbol(Symbol, Size, ByteAlignment);
- }
-
- virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
- unsigned Size = 0, unsigned ByteAlignment = 0) {
- LogCall("EmitZerofill");
- return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
- }
-
- virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment = 0) {
- LogCall("EmitTBSSSymbol");
- return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
- }
-
- virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
- LogCall("EmitBytes");
- return Child->EmitBytes(Data, AddrSpace);
- }
-
- virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace){
- LogCall("EmitValue");
- return Child->EmitValueImpl(Value, Size, AddrSpace);
- }
-
- virtual void EmitULEB128Value(const MCExpr *Value) {
- LogCall("EmitULEB128Value");
- return Child->EmitULEB128Value(Value);
- }
-
- virtual void EmitSLEB128Value(const MCExpr *Value) {
- LogCall("EmitSLEB128Value");
- return Child->EmitSLEB128Value(Value);
- }
-
- virtual void EmitGPRel32Value(const MCExpr *Value) {
- LogCall("EmitGPRel32Value");
- return Child->EmitGPRel32Value(Value);
- }
-
- virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
- unsigned AddrSpace) {
- LogCall("EmitFill");
- return Child->EmitFill(NumBytes, FillValue, AddrSpace);
- }
-
- virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
- unsigned ValueSize = 1,
- unsigned MaxBytesToEmit = 0) {
- LogCall("EmitValueToAlignment");
- return Child->EmitValueToAlignment(ByteAlignment, Value,
- ValueSize, MaxBytesToEmit);
- }
-
- virtual void EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit = 0) {
- LogCall("EmitCodeAlignment");
- return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
- }
-
- virtual void EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0) {
- LogCall("EmitValueToOffset");
- return Child->EmitValueToOffset(Offset, Value);
- }
-
- virtual void EmitFileDirective(StringRef Filename) {
- LogCall("EmitFileDirective", "FileName:" + Filename);
- return Child->EmitFileDirective(Filename);
- }
-
- virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
- LogCall("EmitDwarfFileDirective",
- "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
- return Child->EmitDwarfFileDirective(FileNo, Filename);
- }
-
- virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
- unsigned Column, unsigned Flags,
- unsigned Isa, unsigned Discriminator,
- StringRef FileName) {
- LogCall("EmitDwarfLocDirective",
- "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
- " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
- " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
- return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
- Isa, Discriminator, FileName);
- }
-
- virtual void EmitInstruction(const MCInst &Inst) {
- LogCall("EmitInstruction");
- return Child->EmitInstruction(Inst);
- }
-
- virtual void EmitRawText(StringRef String) {
- LogCall("EmitRawText", "\"" + String + "\"");
- return Child->EmitRawText(String);
- }
-
- virtual void Finish() {
- LogCall("Finish");
- return Child->Finish();
- }
-
-};
-
-} // end anonymous namespace.
-
-MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
- return new MCLoggingStreamer(Child, OS);
-}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index aa35815dd19c..bc6cf773217c 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -53,23 +53,23 @@ public:
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
- assert(0 && "macho doesn't support this directive");
+ llvm_unreachable("macho doesn't support this directive");
}
virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
- assert(0 && "macho doesn't support this directive");
+ llvm_unreachable("macho doesn't support this directive");
}
virtual void EmitCOFFSymbolType(int Type) {
- assert(0 && "macho doesn't support this directive");
+ llvm_unreachable("macho doesn't support this directive");
}
virtual void EndCOFFSymbolDef() {
- assert(0 && "macho doesn't support this directive");
+ llvm_unreachable("macho doesn't support this directive");
}
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- assert(0 && "macho doesn't support this directive");
+ llvm_unreachable("macho doesn't support this directive");
}
virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
- assert(0 && "macho doesn't support this directive");
+ llvm_unreachable("macho doesn't support this directive");
}
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
@@ -89,7 +89,7 @@ public:
//report_fatal_error("unsupported directive: '.file'");
}
- virtual void Finish();
+ virtual void FinishImpl();
/// @}
};
@@ -140,7 +140,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
// Let the target do whatever target specific stuff it needs to do.
- getAssembler().getBackend().HandleAssemblerFlag(Flag);
+ getAssembler().getBackend().handleAssemblerFlag(Flag);
// Do any generic stuff we need to do.
switch (Flag) {
case MCAF_SyntaxUnified: return; // no-op here.
@@ -150,14 +150,10 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
return;
- default:
- llvm_unreachable("invalid assembler flag!");
}
}
void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
- // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
-
// Remember that the function is a thumb function. Fixup and relocation
// values will need adjusted.
getAssembler().setIsThumbFunc(Symbol);
@@ -215,8 +211,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_Protected:
case MCSA_Weak:
case MCSA_Local:
- assert(0 && "Invalid symbol attribute for Mach-O!");
- break;
+ llvm_unreachable("Invalid symbol attribute for Mach-O!");
case MCSA_Global:
SD.setExternal(true);
@@ -377,7 +372,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
DF->getContents().append(Code.begin(), Code.end());
}
-void MCMachOStreamer::Finish() {
+void MCMachOStreamer::FinishImpl() {
EmitFrames(true);
// We have to set the fragment atom associations so we can relax properly for
@@ -409,7 +404,7 @@ void MCMachOStreamer::Finish() {
}
}
- this->MCObjectStreamer::Finish();
+ this->MCObjectStreamer::FinishImpl();
}
MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index b1d09d945a39..f5631608330c 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCModule.cpp - MCModule implementation --------------------------===//
+//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index a6c0adb6793f..7ff2d1bf641b 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -55,6 +55,7 @@ namespace {
virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
virtual void EmitCOFFSymbolType(int Type) {}
virtual void EndCOFFSymbolDef() {}
+ virtual void EmitCOFFSecRel32(MCSymbol const *Symbol) {}
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -79,11 +80,12 @@ namespace {
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0) {}
- virtual void EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0) {}
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0) { return false; }
virtual void EmitFileDirective(StringRef Filename) {}
- virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename) {
return false;
}
virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -92,7 +94,11 @@ namespace {
StringRef FileName) {}
virtual void EmitInstruction(const MCInst &Inst) {}
- virtual void Finish() {}
+ virtual void FinishImpl() {}
+
+ virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcEnd(Frame);
+ }
/// @}
};
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index df8b99d2be6c..b22ae331c9a9 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -56,8 +56,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
TLSThreadInitSection
= Ctx->getMachOSection("__DATA", "__thread_init",
- MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
- SectionKind::getDataRel());
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+ SectionKind::getDataRel());
CStringSection // .cstring
= Ctx->getMachOSection("__TEXT", "__cstring",
@@ -152,6 +152,24 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
SectionKind::getReadOnly());
// Debug Information.
+ DwarfAccelNamesSection =
+ Ctx->getMachOSection("__DWARF", "__apple_names",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfAccelObjCSection =
+ Ctx->getMachOSection("__DWARF", "__apple_objc",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ // 16 character section limit...
+ DwarfAccelNamespaceSection =
+ Ctx->getMachOSection("__DWARF", "__apple_namespac",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfAccelTypesSection =
+ Ctx->getMachOSection("__DWARF", "__apple_types",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+
DwarfAbbrevSection =
Ctx->getMachOSection("__DWARF", "__debug_abbrev",
MCSectionMachO::S_ATTR_DEBUG,
@@ -168,10 +186,6 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
Ctx->getMachOSection("__DWARF", "__debug_frame",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
- DwarfPubNamesSection =
- Ctx->getMachOSection("__DWARF", "__debug_pubnames",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
DwarfPubTypesSection =
Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
MCSectionMachO::S_ATTR_DEBUG,
@@ -207,8 +221,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
if (T.getArch() == Triple::x86) {
PersonalityEncoding = (RelocM == Reloc::PIC_)
- ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
- : dwarf::DW_EH_PE_absptr;
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
LSDAEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
@@ -216,8 +230,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
TTypeEncoding = (RelocM == Reloc::PIC_)
- ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
- : dwarf::DW_EH_PE_absptr;
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
} else if (T.getArch() == Triple::x86_64) {
FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -244,10 +258,22 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
}
}
+ // Solaris requires different flags for .eh_frame to seemingly every other
+ // platform.
+ EHSectionType = ELF::SHT_PROGBITS;
+ EHSectionFlags = ELF::SHF_ALLOC;
+ if (T.getOS() == Triple::Solaris) {
+ if (T.getArch() == Triple::x86_64)
+ EHSectionType = ELF::SHT_X86_64_UNWIND;
+ else
+ EHSectionFlags |= ELF::SHF_WRITE;
+ }
+
+
// ELF
BSSSection =
Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
- ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getBSS());
TextSection =
@@ -347,15 +373,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
DwarfFrameSection =
Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
- DwarfPubNamesSection =
- Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
DwarfPubTypesSection =
Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfStrSection =
- Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
+ Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS,
+ SectionKind::getMergeable1ByteCString());
DwarfLocSection =
Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
@@ -390,12 +414,22 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- StaticCtorSection =
- Ctx->getCOFFSection(".ctors",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ if (T.getOS() == Triple::Win32) {
+ StaticCtorSection =
+ Ctx->getCOFFSection(".CRT$XCU",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ } else {
+ StaticCtorSection =
+ Ctx->getCOFFSection(".ctors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ }
+
+
StaticDtorSection =
Ctx->getCOFFSection(".dtors",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -434,11 +468,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
- DwarfPubNamesSection =
- Ctx->getCOFFSection(".debug_pubnames",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
DwarfPubTypesSection =
Ctx->getCOFFSection(".debug_pubtypes",
COFF::IMAGE_SCN_MEM_DISCARDABLE |
@@ -488,6 +517,12 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
+ TLSDataSection =
+ Ctx->getCOFFSection(".tls$",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
}
void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
@@ -505,8 +540,12 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
TTypeEncoding = dwarf::DW_EH_PE_absptr;
- EHFrameSection = 0; // Created on demand.
- CompactUnwindSection = 0; // Used only by selected targets.
+ EHFrameSection = 0; // Created on demand.
+ CompactUnwindSection = 0; // Used only by selected targets.
+ DwarfAccelNamesSection = 0; // Used only by selected targets.
+ DwarfAccelObjCSection = 0; // Used only by selected targets.
+ DwarfAccelNamespaceSection = 0; // Used only by selected targets.
+ DwarfAccelTypesSection = 0; // Used only by selected targets.
Triple T(TT);
Triple::ArchType Arch = T.getArch();
@@ -541,8 +580,8 @@ void MCObjectFileInfo::InitEHFrameSection() {
SectionKind::getReadOnly());
else if (Env == IsELF)
EHFrameSection =
- Ctx->getELFSection(".eh_frame", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
+ Ctx->getELFSection(".eh_frame", EHSectionType,
+ EHSectionFlags,
SectionKind::getDataRel());
else
EHFrameSection =
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index a04ae0812a3e..bad7cfe38a17 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
-
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
@@ -105,6 +105,14 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
DF->getContents().resize(DF->getContents().size() + Size, 0);
}
+void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcStart(Frame);
+}
+
+void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcEnd(Frame);
+}
+
void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
MCStreamer::EmitLabel(Symbol);
@@ -164,7 +172,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
MCLineEntry::Make(this, getCurrentSection());
// If this instruction doesn't need relaxation, just emit it as data.
- if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+ if (!getAssembler().getBackend().mayNeedRelaxation(Inst)) {
EmitInstToData(Inst);
return;
}
@@ -173,9 +181,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
// possible and emit it as data.
if (getAssembler().getRelaxAll()) {
MCInst Relaxed;
- getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
- while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
- getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+ getAssembler().getBackend().relaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().mayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().relaxInstruction(Relaxed, Relaxed);
EmitInstToData(Relaxed);
return;
}
@@ -224,12 +232,12 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
}
-void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) {
+bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
int64_t Res;
if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
new MCOrgFragment(*Offset, Value, getCurrentSectionData());
- return;
+ return false;
}
MCSymbol *CurrentPos = getContext().CreateTempSymbol();
@@ -241,14 +249,30 @@ void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
- report_fatal_error("expected assembly-time absolute expression");
+ return true;
EmitFill(Res, Value, 0);
+ return false;
}
-void MCObjectStreamer::Finish() {
+// Associate GPRel32 fixup with data and resize data area
+void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ Value,
+ FK_GPRel_4));
+ DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
+void MCObjectStreamer::FinishImpl() {
// Dump out the dwarf file & directory tables and line tables.
+ const MCSymbol *LineSectionSymbol = NULL;
if (getContext().hasDwarfFiles())
- MCDwarfFileTable::Emit(this);
+ LineSectionSymbol = MCDwarfFileTable::Emit(this);
+
+ // If we are generating dwarf for assembly source files dump out the sections.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfInfo::Emit(this, LineSectionSymbol);
getAssembler().Finish();
}
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index efe9f68ee22b..030f24793c55 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -33,14 +33,22 @@ void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
}
/// Utility function to encode a ULEB128 value.
-void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS,
+ unsigned Padding) {
do {
uint8_t Byte = Value & 0x7f;
Value >>= 7;
- if (Value != 0)
+ if (Value != 0 || Padding != 0)
Byte |= 0x80; // Mark this byte that that more bytes will follow.
OS << char(Byte);
} while (Value != 0);
+
+ // Pad with 0x80 and emit a null byte at the end.
+ if (Padding != 0) {
+ for (; Padding != 1; --Padding)
+ OS << '\x80';
+ OS << '\x00';
+ }
}
bool
@@ -60,6 +68,8 @@ MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
const MCSymbolData &DataA = Asm.getSymbolData(SA);
const MCSymbolData &DataB = Asm.getSymbolData(SB);
+ if(!DataA.getFragment() || !DataB.getFragment())
+ return false;
return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
*DataB.getFragment(),
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 16487579abfc..2d61cac62585 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -30,6 +29,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
@@ -122,6 +122,9 @@ private:
int64_t CppHashLineNumber;
SMLoc CppHashLoc;
+ /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
+ unsigned AssemblerDialect;
+
public:
AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI);
@@ -143,9 +146,20 @@ public:
virtual MCAsmLexer &getLexer() { return Lexer; }
virtual MCContext &getContext() { return Ctx; }
virtual MCStreamer &getStreamer() { return Out; }
+ virtual unsigned getAssemblerDialect() {
+ if (AssemblerDialect == ~0U)
+ return MAI.getAssemblerDialect();
+ else
+ return AssemblerDialect;
+ }
+ virtual void setAssemblerDialect(unsigned i) {
+ AssemblerDialect = i;
+ }
- virtual bool Warning(SMLoc L, const Twine &Msg);
- virtual bool Error(SMLoc L, const Twine &Msg);
+ virtual bool Warning(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+ virtual bool Error(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
const AsmToken &Lex();
@@ -171,14 +185,17 @@ private:
void HandleMacroExit();
void PrintMacroInstantiations();
- void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
- bool ShowLine = true) const {
- SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
+ void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
+ SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
}
static void DiagHandler(const SMDiagnostic &Diag, void *Context);
/// EnterIncludeFile - Enter the specified file. This returns true on failure.
bool EnterIncludeFile(const std::string &Filename);
+ /// ProcessIncbinFile - Process the specified file for the .incbin directive.
+ /// This returns true on failure.
+ bool ProcessIncbinFile(const std::string &Filename);
/// \brief Reset the current lexer position to that given by \arg Loc. The
/// current token is not set; clients should ensure Lex() is called
@@ -225,6 +242,7 @@ private:
bool ParseDirectiveAbort(); // ".abort"
bool ParseDirectiveInclude(); // ".include"
+ bool ParseDirectiveIncbin(); // ".incbin"
bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
// ".ifdef" or ".ifndef", depending on expect_defined
@@ -295,6 +313,12 @@ public:
&GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
AddDirectiveHandler<
&GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIRestore>(".cfi_restore");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIEscape>(".cfi_escape");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFISignalFrame>(".cfi_signal_frame");
// Macro directives.
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
@@ -328,6 +352,9 @@ public:
bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRestore(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIEscape(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFISignalFrame(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
@@ -352,7 +379,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
MCStreamer &_Out, const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
- CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0) {
+ CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
+ AssemblerDialect(~0U) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
@@ -395,21 +423,21 @@ void AsmParser::PrintMacroInstantiations() {
// Print the active macro instantiation stack.
for (std::vector<MacroInstantiation*>::const_reverse_iterator
it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
- PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
- "note");
+ PrintMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
+ "while in macro instantiation");
}
-bool AsmParser::Warning(SMLoc L, const Twine &Msg) {
+bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
if (FatalAssemblerWarnings)
- return Error(L, Msg);
- PrintMessage(L, Msg, "warning");
+ return Error(L, Msg, Ranges);
+ PrintMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
PrintMacroInstantiations();
return false;
}
-bool AsmParser::Error(SMLoc L, const Twine &Msg) {
+bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
HadError = true;
- PrintMessage(L, Msg, "error");
+ PrintMessage(L, SourceMgr::DK_Error, Msg, Ranges);
PrintMacroInstantiations();
return true;
}
@@ -427,6 +455,21 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
return false;
}
+/// Process the specified .incbin file by seaching for it in the include paths
+/// then just emiting the byte contents of the file to the streamer. This
+/// returns true on failure.
+bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
+ std::string IncludedFile;
+ int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (NewBuf == -1)
+ return true;
+
+ // Pick up the bytes from the file and emit them.
+ getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(),
+ DEFAULT_ADDRSPACE);
+ return false;
+}
+
void AsmParser::JumpToLoc(SMLoc Loc) {
CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
@@ -462,6 +505,17 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
HadError = false;
AsmCond StartingCondState = TheCondState;
+ // If we are generating dwarf for assembly source files save the initial text
+ // section and generate a .file directive.
+ if (getContext().getGenDwarfForAssembly()) {
+ getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+ MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
+ getStreamer().EmitLabel(SectionStartSym);
+ getContext().setGenDwarfSectionStartSym(SectionStartSym);
+ getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
+ StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier());
+ }
+
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
if (!ParseStatement()) continue;
@@ -501,8 +555,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// FIXME: We would really like to refer back to where the symbol was
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
- PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
- Sym->getName() + "' not defined", "error", false);
+ PrintMessage(getLexer().getLoc(), SourceMgr::DK_Error,
+ "assembler local symbol '" + Sym->getName() +
+ "' not defined");
}
}
@@ -749,8 +804,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
}
}
- assert(0 && "Invalid expression kind!");
- return 0;
+ llvm_unreachable("Invalid expression kind!");
}
/// ParseExpression - Parse an expression and return it.
@@ -787,7 +841,6 @@ bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
if (!ModifiedRes) {
return TokError("invalid modifier '" + getTok().getIdentifier() +
"' (no symbols present)");
- return true;
}
Res = ModifiedRes;
@@ -1036,6 +1089,12 @@ bool AsmParser::ParseStatement() {
// Emit the label.
Out.EmitLabel(Sym);
+ // If we are generating dwarf for assembly source files then gather the
+ // info to make a dwarf label entry for this label if needed.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
+ IDLoc);
+
// Consume any end of statement token, if present, to avoid spurious
// AddBlankLine calls().
if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -1163,6 +1222,8 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveAbort();
if (IDVal == ".include")
return ParseDirectiveInclude();
+ if (IDVal == ".incbin")
+ return ParseDirectiveIncbin();
if (IDVal == ".code16")
return TokError(Twine(IDVal) + " not supported yet");
@@ -1205,7 +1266,19 @@ bool AsmParser::ParseStatement() {
}
OS << "]";
- PrintMessage(IDLoc, OS.str(), "note");
+ PrintMessage(IDLoc, SourceMgr::DK_Note, OS.str());
+ }
+
+ // If we are generating dwarf for assembly source files and the current
+ // section is the initial text section then generate a .loc directive for
+ // the instruction.
+ if (!HadError && getContext().getGenDwarfForAssembly() &&
+ getContext().getGenDwarfSection() == getStreamer().getCurrentSection() ) {
+ getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(),
+ SrcMgr.FindLineNumber(IDLoc, CurBuffer),
+ 0, DWARF2_LINE_DEFAULT_IS_STMT ?
+ DWARF2_FLAG_IS_STMT : 0, 0, 0,
+ StringRef());
}
// If parsing succeeded, match the instruction.
@@ -1294,7 +1367,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
else
- Diag.Print(0, OS);
+ Diag.print(0, OS);
return;
}
@@ -1309,19 +1382,15 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
int LineNo = Parser->CppHashLineNumber - 1 +
(DiagLocLineNo - CppHashLocLineNo);
- SMDiagnostic NewDiag(*Diag.getSourceMgr(),
- Diag.getLoc(),
- Filename,
- LineNo,
- Diag.getColumnNo(),
- Diag.getMessage(),
- Diag.getLineContents(),
- Diag.getShowLine());
+ SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(),
+ Filename, LineNo, Diag.getColumnNo(),
+ Diag.getKind(), Diag.getMessage(),
+ Diag.getLineContents(), Diag.getRanges());
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
else
- NewDiag.Print(0, OS);
+ NewDiag.print(0, OS);
}
bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body,
@@ -1458,6 +1527,11 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
}
Lex();
}
+ // If there weren't any arguments, erase the token vector so everything
+ // else knows that. Leaving around the vestigal empty token list confuses
+ // things.
+ if (MacroArguments.size() == 1 && MacroArguments.back().empty())
+ MacroArguments.clear();
// Macro instantiation is lexical, unfortunately. We construct a new buffer
// to hold the macro body with substitutions.
@@ -1495,23 +1569,27 @@ void AsmParser::HandleMacroExit() {
ActiveMacros.pop_back();
}
-static void MarkUsed(const MCExpr *Value) {
+static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
switch (Value->getKind()) {
- case MCExpr::Binary:
- MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
- MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
+ return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
break;
+ }
case MCExpr::Target:
case MCExpr::Constant:
- break;
+ return false;
case MCExpr::SymbolRef: {
- static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
- break;
+ const MCSymbol &S = static_cast<const MCSymbolRefExpr*>(Value)->getSymbol();
+ if (S.isVariable())
+ return IsUsedIn(Sym, S.getVariableValue());
+ return &S == Sym;
}
case MCExpr::Unary:
- MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
- break;
+ return IsUsedIn(Sym, static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
}
+
+ llvm_unreachable("Unknown expr kind!");
}
bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
@@ -1522,7 +1600,9 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
if (ParseExpression(Value))
return true;
- MarkUsed(Value);
+ // Note: we don't count b as used in "a = b". This is to allow
+ // a = b
+ // b = c
if (Lexer.isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in assignment");
@@ -1544,8 +1624,12 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
- if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+ if (IsUsedIn(Sym, Value))
+ return Error(EqualLoc, "Recursive use of '" + Name + "'");
+ else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
; // Allow redefinitions of undefined symbols only used in directives.
+ else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
+ ; // Allow redefinitions of variables that haven't yet been used.
else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
return Error(EqualLoc, "redefinition of '" + Name + "'");
else if (!Sym->isVariable())
@@ -1912,6 +1996,7 @@ bool AsmParser::ParseDirectiveOrg() {
CheckForValidSection();
const MCExpr *Offset;
+ SMLoc Loc = getTok().getLoc();
if (ParseExpression(Offset))
return true;
@@ -1931,9 +2016,11 @@ bool AsmParser::ParseDirectiveOrg() {
Lex();
- // FIXME: Only limited forms of relocatable expressions are accepted here, it
- // has to be relative to the current section.
- getStreamer().EmitValueToOffset(Offset, FillExpr);
+ // Only limited forms of relocatable expressions are accepted here, it
+ // has to be relative to the current section. The streamer will return
+ // 'true' if the expression wasn't evaluatable.
+ if (getStreamer().EmitValueToOffset(Offset, FillExpr))
+ return Error(Loc, "expected assembly-time absolute expression");
return false;
}
@@ -2178,6 +2265,31 @@ bool AsmParser::ParseDirectiveInclude() {
return false;
}
+/// ParseDirectiveIncbin
+/// ::= .incbin "filename"
+bool AsmParser::ParseDirectiveIncbin() {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.incbin' directive");
+
+ std::string Filename = getTok().getString();
+ SMLoc IncbinLoc = getLexer().getLoc();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.incbin' directive");
+
+ // Strip the quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Attempt to process the included file.
+ if (ProcessIncbinFile(Filename)) {
+ Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
+ return true;
+ }
+
+ return false;
+}
+
/// ParseDirectiveIf
/// ::= .if expression
bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
@@ -2305,7 +2417,8 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
}
/// ParseDirectiveFile
-/// ::= .file [number] string
+/// ::= .file [number] filename
+/// ::= .file number directory filename
bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
// FIXME: I'm not sure what this is.
int64_t FileNumber = -1;
@@ -2321,17 +2434,35 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::String))
return TokError("unexpected token in '.file' directive");
- StringRef Filename = getTok().getString();
- Filename = Filename.substr(1, Filename.size()-2);
+ // Usually the directory and filename together, otherwise just the directory.
+ StringRef Path = getTok().getString();
+ Path = Path.substr(1, Path.size()-2);
Lex();
+ StringRef Directory;
+ StringRef Filename;
+ if (getLexer().is(AsmToken::String)) {
+ if (FileNumber == -1)
+ return TokError("explicit path specified, but no file number");
+ Filename = getTok().getString();
+ Filename = Filename.substr(1, Filename.size()-2);
+ Directory = Path;
+ Lex();
+ } else {
+ Filename = Path;
+ }
+
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.file' directive");
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
else {
- if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+ if (getContext().getGenDwarfForAssembly() == true)
+ Error(DirectiveLoc, "input can't have .file dwarf directives when -g is "
+ "used to generate dwarf debug info for assembly code");
+
+ if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename))
Error(FileNumberLoc, "file number already allocated");
}
@@ -2719,6 +2850,56 @@ bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal,
return false;
}
+/// ParseDirectiveCFIRestore
+/// ::= .cfi_restore register
+bool GenericAsmParser::ParseDirectiveCFIRestore(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIRestore(Register);
+
+ return false;
+}
+
+/// ParseDirectiveCFIEscape
+/// ::= .cfi_escape expression[,...]
+bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ std::string Values;
+ int64_t CurrValue;
+ if (getParser().ParseAbsoluteExpression(CurrValue))
+ return true;
+
+ Values.push_back((uint8_t)CurrValue);
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getParser().ParseAbsoluteExpression(CurrValue))
+ return true;
+
+ Values.push_back((uint8_t)CurrValue);
+ }
+
+ getStreamer().EmitCFIEscape(Values);
+ return false;
+}
+
+/// ParseDirectiveCFISignalFrame
+/// ::= .cfi_signal_frame
+bool GenericAsmParser::ParseDirectiveCFISignalFrame(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(getLexer().getLoc(),
+ "unexpected token in '" + Directive + "' directive");
+
+ getStreamer().EmitCFISignalFrame();
+
+ return false;
+}
+
/// ParseDirectiveMacrosOnOff
/// ::= .macros_on
/// ::= .macros_off
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index 299d28168948..222f237bfd64 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMMCParser
MCAsmParserExtension.cpp
MCTargetAsmParser.cpp
)
-
-add_llvm_library_dependencies(LLVMMCParser
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 185b5168bd6c..c4cdc3c9f96f 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -45,6 +45,7 @@ class COFFAsmParser : public MCAsmParserExtension {
AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32");
// Win64 EH directives.
AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
@@ -102,6 +103,7 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseDirectiveScl(StringRef, SMLoc);
bool ParseDirectiveType(StringRef, SMLoc);
bool ParseDirectiveEndef(StringRef, SMLoc);
+ bool ParseDirectiveSecRel32(StringRef, SMLoc);
// Win64 EH directives.
bool ParseSEHDirectiveStartProc(StringRef, SMLoc);
@@ -217,6 +219,21 @@ bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
return false;
}
+bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().ParseIdentifier(SymbolID))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+
+ Lex();
+ getStreamer().EmitCOFFSecRel32(Symbol);
+ return false;
+}
+
bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
StringRef SymbolID;
if (getParser().ParseIdentifier(SymbolID))
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d89112645cd8..ffc400b203f9 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -476,6 +476,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
.Case("common", MCSA_ELF_TypeCommon)
.Case("notype", MCSA_ELF_TypeNoType)
.Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
.Default(MCSA_Invalid);
if (Attr == MCSA_Invalid)
diff --git a/lib/MC/MCParser/LLVMBuild.txt b/lib/MC/MCParser/LLVMBuild.txt
new file mode 100644
index 000000000000..bcb0febf3323
--- /dev/null
+++ b/lib/MC/MCParser/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/MC/MCParser/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCParser
+parent = MC
+required_libraries = MC Support
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index dceece78ba10..3a3ff147117e 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -25,3 +25,7 @@ SMLoc MCAsmLexer::getLoc() const {
SMLoc AsmToken::getLoc() const {
return SMLoc::getFromPointer(Str.data());
}
+
+SMLoc AsmToken::getEndLoc() const {
+ return SMLoc::getFromPointer(Str.data() + Str.size() - 1);
+}
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 5239ec753e77..3a825f03b776 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -33,8 +33,8 @@ const AsmToken &MCAsmParser::getTok() {
return getLexer().getTok();
}
-bool MCAsmParser::TokError(const Twine &Msg) {
- Error(getLexer().getLoc(), Msg);
+bool MCAsmParser::TokError(const Twine &Msg, ArrayRef<SMRange> Ranges) {
+ Error(getLexer().getLoc(), Msg, Ranges);
return true;
}
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 086c9229bcdc..a770c974380a 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -46,9 +46,9 @@ public:
unsigned MaxBytesToEmit = 0);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0);
- virtual void EmitValueToOffset(const MCExpr *Offset,
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
- virtual void Finish();
+ virtual void FinishImpl();
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
@@ -93,9 +93,9 @@ public:
virtual void EmitFileDirective(StringRef Filename) {
report_fatal_error("unsupported directive in pure streamer");
}
- virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename) {
report_fatal_error("unsupported directive in pure streamer");
- return false;
}
/// @}
@@ -184,9 +184,10 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
getCurrentSectionData()->setAlignment(ByteAlignment);
}
-void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+ return false;
}
void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
@@ -223,10 +224,10 @@ void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
DF->getContents().append(Code.begin(), Code.end());
}
-void MCPureStreamer::Finish() {
+void MCPureStreamer::FinishImpl() {
// FIXME: Handle DWARF tables?
- this->MCObjectStreamer::Finish();
+ this->MCObjectStreamer::FinishImpl();
}
MCStreamer *llvm::createPureStreamer(MCContext &Context, MCAsmBackend &MAB,
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 3afa22b0d0be..43e62ff89a0f 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -16,7 +16,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include <cstdlib>
using namespace llvm;
@@ -94,17 +93,18 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
/// client having to pass in a MCExpr for constant integers.
-void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
- SmallString<32> Tmp;
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
+ unsigned Padding) {
+ SmallString<128> Tmp;
raw_svector_ostream OSE(Tmp);
- MCObjectWriter::EncodeULEB128(Value, OSE);
+ MCObjectWriter::EncodeULEB128(Value, OSE, Padding);
EmitBytes(OSE.str(), AddrSpace);
}
/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
/// client having to pass in a MCExpr for constant integers.
void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
- SmallString<32> Tmp;
+ SmallString<128> Tmp;
raw_svector_ostream OSE(Tmp);
MCObjectWriter::EncodeSLEB128(Value, OSE);
EmitBytes(OSE.str(), AddrSpace);
@@ -128,6 +128,10 @@ void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
AddrSpace);
}
+void MCStreamer::EmitGPRel64Value(const MCExpr *Value) {
+ report_fatal_error("unsupported directive in streamer");
+}
+
void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
report_fatal_error("unsupported directive in streamer");
}
@@ -142,8 +146,9 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
}
bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Directory,
StringRef Filename) {
- return getContext().GetDwarfFile(Filename, FileNo) == 0;
+ return getContext().GetDwarfFile(Directory, Filename, FileNo) == 0;
}
void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -186,9 +191,8 @@ void MCStreamer::EmitDataRegion() {
if (!MAI.getSupportsDataRegions()) return;
// Generate a unique symbol name.
- MCSymbol *NewSym = Context.GetOrCreateSymbol(
- Twine(MAI.getDataBeginLabelName()) +
- utostr(UniqueDataBeginSuffix++));
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getDataBeginLabelName() +
+ Twine(UniqueDataBeginSuffix++));
EmitLabel(NewSym);
RegionIndicator = Data;
@@ -202,9 +206,8 @@ void MCStreamer::EmitCodeRegion() {
if (!MAI.getSupportsDataRegions()) return;
// Generate a unique symbol name.
- MCSymbol *NewSym = Context.GetOrCreateSymbol(
- Twine(MAI.getCodeBeginLabelName()) +
- utostr(UniqueCodeBeginSuffix++));
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getCodeBeginLabelName() +
+ Twine(UniqueCodeBeginSuffix++));
EmitLabel(NewSym);
RegionIndicator = Code;
@@ -218,9 +221,9 @@ void MCStreamer::EmitJumpTable8Region() {
if (!MAI.getSupportsDataRegions()) return;
// Generate a unique symbol name.
- MCSymbol *NewSym = Context.GetOrCreateSymbol(
- Twine(MAI.getJumpTable8BeginLabelName()) +
- utostr(UniqueDataBeginSuffix++));
+ MCSymbol *NewSym =
+ Context.GetOrCreateSymbol(MAI.getJumpTable8BeginLabelName() +
+ Twine(UniqueDataBeginSuffix++));
EmitLabel(NewSym);
RegionIndicator = JumpTable8;
@@ -234,9 +237,9 @@ void MCStreamer::EmitJumpTable16Region() {
if (!MAI.getSupportsDataRegions()) return;
// Generate a unique symbol name.
- MCSymbol *NewSym = Context.GetOrCreateSymbol(
- Twine(MAI.getJumpTable16BeginLabelName()) +
- utostr(UniqueDataBeginSuffix++));
+ MCSymbol *NewSym =
+ Context.GetOrCreateSymbol(MAI.getJumpTable16BeginLabelName() +
+ Twine(UniqueDataBeginSuffix++));
EmitLabel(NewSym);
RegionIndicator = JumpTable16;
@@ -251,9 +254,9 @@ void MCStreamer::EmitJumpTable32Region() {
if (!MAI.getSupportsDataRegions()) return;
// Generate a unique symbol name.
- MCSymbol *NewSym = Context.GetOrCreateSymbol(
- Twine(MAI.getJumpTable32BeginLabelName()) +
- utostr(UniqueDataBeginSuffix++));
+ MCSymbol *NewSym =
+ Context.GetOrCreateSymbol(MAI.getJumpTable32BeginLabelName() +
+ Twine(UniqueDataBeginSuffix++));
EmitLabel(NewSym);
RegionIndicator = JumpTable32;
@@ -277,8 +280,17 @@ void MCStreamer::EmitCFIStartProc() {
report_fatal_error("Starting a frame before finishing the previous one!");
MCDwarfFrameInfo Frame;
- Frame.Function = LastSymbol;
+ EmitCFIStartProcImpl(Frame);
+ FrameInfos.push_back(Frame);
+ RegionIndicator = Code;
+}
+
+void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+}
+
+void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) {
+ Frame.Function = LastSymbol;
// If the function is externally visible, we need to create a local
// symbol to avoid relocations.
StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
@@ -288,16 +300,20 @@ void MCStreamer::EmitCFIStartProc() {
Frame.Begin = getContext().CreateTempSymbol();
EmitLabel(Frame.Begin);
}
-
- FrameInfos.push_back(Frame);
- RegionIndicator = Code;
}
void MCStreamer::EmitCFIEndProc() {
EnsureValidFrame();
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- CurFrame->End = getContext().CreateTempSymbol();
- EmitLabel(CurFrame->End);
+ EmitCFIEndProcImpl(*CurFrame);
+}
+
+void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+}
+
+void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) {
+ Frame.End = getContext().CreateTempSymbol();
+ EmitLabel(Frame.End);
}
void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
@@ -386,7 +402,7 @@ void MCStreamer::EmitCFIRememberState() {
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
MCSymbol *Label = getContext().CreateTempSymbol();
EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::RememberState, Label);
CurFrame->Instructions.push_back(Instruction);
}
@@ -396,7 +412,7 @@ void MCStreamer::EmitCFIRestoreState() {
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
MCSymbol *Label = getContext().CreateTempSymbol();
EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::RestoreState, Label);
CurFrame->Instructions.push_back(Instruction);
}
@@ -409,6 +425,30 @@ void MCStreamer::EmitCFISameValue(int64_t Register) {
CurFrame->Instructions.push_back(Instruction);
}
+void MCStreamer::EmitCFIRestore(int64_t Register) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label, Register);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIEscape(StringRef Values) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::Escape, Label, Values);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFISignalFrame() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->IsSignalFrame = true;
+}
+
void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
W64UnwindInfos.push_back(Frame);
CurrentW64UnwindInfo = W64UnwindInfos.back();
@@ -559,6 +599,10 @@ void MCStreamer::EmitWin64EHEndProlog() {
EmitLabel(CurFrame->PrologEnd);
}
+void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+ llvm_unreachable("This file format doesn't support this directive");
+}
+
void MCStreamer::EmitFnStart() {
errs() << "Not implemented yet\n";
abort();
@@ -631,3 +675,10 @@ void MCStreamer::EmitW64Tables() {
MCWin64EHUnwindEmitter::Emit(*this);
}
+
+void MCStreamer::Finish() {
+ if (!FrameInfos.empty() && !FrameInfos.back().End)
+ report_fatal_error("Unfinished frame!");
+
+ FinishImpl();
+}
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index c2fad1674aa4..e013e77f58af 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -54,17 +54,14 @@ const MCSymbol &MCSymbol::AliasedSymbol() const {
void MCSymbol::setVariableValue(const MCExpr *Value) {
assert(!IsUsed && "Cannot set a variable that has already been used.");
assert(Value && "Invalid variable value!");
- assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
- "Invalid redefinition!");
this->Value = Value;
// Variables should always be marked as in the same "section" as the value.
const MCSection *Section = Value->FindAssociatedSection();
- if (Section) {
+ if (Section)
setSection(*Section);
- } else {
+ else
setUndefined();
- }
}
void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a9219ad29c65..8e4066c894ba 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -8,13 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbol.h"
@@ -584,9 +584,16 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
// requires the compiler to use .set to absolutize the differences between
// symbols which the compiler knows to be assembly time constants, so we
// don't need to worry about considering symbol differences fully resolved.
+ //
+ // If the file isn't using sub-sections-via-symbols, we can make the
+ // same assumptions about any symbol that we normally make about
+ // assembler locals.
if (!Asm.getBackend().hasReliableSymbolDifference()) {
- if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+ if (!SA.isInSection() || &SecA != &SecB ||
+ (!SA.isTemporary() &&
+ FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() &&
+ Asm.getSubsectionsViaSymbols()))
return false;
return true;
}
@@ -628,7 +635,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
}
void MachObjectWriter::WriteObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+ const MCAsmLayout &Layout) {
unsigned NumSections = Asm.size();
// The section data starts after the header, the segment load command (and
@@ -731,7 +738,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// Write the actual section data.
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
- Asm.WriteSectionData(it, Layout);
+ Asm.writeSectionData(it, Layout);
uint64_t Pad = getPaddingSize(it, Layout);
for (unsigned int i = 0; i < Pad; ++i)
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 348cd4c9ab1b..be4157994c68 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -13,8 +13,8 @@
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
#include <algorithm>
#include <cassert>
#include <cctype>
@@ -114,7 +114,7 @@ void SubtargetFeatures::AddFeature(const StringRef String,
// Don't add empty features
if (!String.empty()) {
// Convert to lowercase, prepend flag and add to vector
- Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+ Features.push_back(PrependFlag(String.lower(), IsEnabled));
}
}
@@ -154,21 +154,19 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
// Print the CPU table.
errs() << "Available CPUs for this target:\n\n";
for (size_t i = 0; i != CPUTableSize; i++)
- errs() << " " << CPUTable[i].Key
- << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
- << " - " << CPUTable[i].Desc << ".\n";
- errs() << "\n";
-
+ errs() << format(" %-*s - %s.\n",
+ MaxCPULen, CPUTable[i].Key, CPUTable[i].Desc);
+ errs() << '\n';
+
// Print the Feature table.
errs() << "Available features for this target:\n\n";
for (size_t i = 0; i != FeatTableSize; i++)
- errs() << " " << FeatTable[i].Key
- << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
- << " - " << FeatTable[i].Desc << ".\n";
- errs() << "\n";
-
+ errs() << format(" %-*s - %s.\n",
+ MaxFeatLen, FeatTable[i].Key, FeatTable[i].Desc);
+ errs() << '\n';
+
errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
- << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+ "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
std::exit(1);
}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index b15e225fc2a3..f706cac8d36c 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -22,8 +22,10 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -33,8 +35,6 @@
#include "llvm/Support/TimeValue.h"
-#include "../Target/X86/MCTargetDesc/X86FixupKinds.h"
-
#include <cstdio>
using namespace llvm;
@@ -128,8 +128,9 @@ public:
typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
typedef DenseMap<MCSection const *, COFFSection *> section_map;
+ llvm::OwningPtr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
+
// Root level file contents.
- bool Is64Bit;
COFF::header Header;
sections Sections;
symbols Symbols;
@@ -139,7 +140,7 @@ public:
section_map SectionMap;
symbol_map SymbolMap;
- WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+ WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
~WinCOFFObjectWriter();
COFFSymbol *createSymbol(StringRef Name);
@@ -281,6 +282,7 @@ StringTable::StringTable() {
// The string table data begins with the length of the entire string table
// including the length header. Allocate space for this header.
Data.resize(4);
+ update_length();
}
size_t StringTable::size() const {
@@ -313,13 +315,13 @@ size_t StringTable::insert(llvm::StringRef String) {
//------------------------------------------------------------------------------
// WinCOFFObjectWriter class implementation
-WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+ raw_ostream &OS)
: MCObjectWriter(OS, true)
- , Is64Bit(is64Bit) {
+ , TargetObjectWriter(MOTW) {
memset(&Header, 0, sizeof(Header));
- Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
- : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+ Header.Machine = TargetObjectWriter->getMachine();
}
WinCOFFObjectWriter::~WinCOFFObjectWriter() {
@@ -694,30 +696,13 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
if (CrossSection)
FixupKind = FK_PCRel_4;
- switch (FixupKind) {
- case FK_PCRel_4:
- case X86::reloc_riprel_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
- : COFF::IMAGE_REL_I386_REL32;
- // FIXME: Can anyone explain what this does other than adjust for the size
- // of the offset?
+ Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+
+ // FIXME: Can anyone explain what this does other than adjust for the size
+ // of the offset?
+ if (Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32 ||
+ Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32)
FixedValue += 4;
- break;
- case FK_Data_4:
- case X86::reloc_signed_4byte:
- Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
- : COFF::IMAGE_REL_I386_DIR32;
- break;
- case FK_Data_8:
- if (Is64Bit)
- Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
- else
- llvm_unreachable("unsupported relocation type");
- break;
- default:
- llvm_unreachable("unsupported relocation type");
- }
coff_section->Relocations.push_back(Reloc);
}
@@ -798,9 +783,22 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
}
if (Sec->Relocations.size() > 0) {
- Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
+
+ if (RelocationsOverflow) {
+ // Signal overflow by setting NumberOfSections to max value. Actual
+ // size is found in reloc #0. Microsoft tools understand this.
+ Sec->Header.NumberOfRelocations = 0xffff;
+ } else {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ }
Sec->Header.PointerToRelocations = offset;
+ if (RelocationsOverflow) {
+ // Reloc #0 will contain actual count, so make room for it.
+ offset += COFF::RelocationSize;
+ }
+
offset += COFF::RelocationSize * Sec->Relocations.size();
for (relocations::iterator cr = Sec->Relocations.begin(),
@@ -835,8 +833,12 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
MCAssembler::const_iterator j, je;
for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
- if ((*i)->Number != -1)
+ if ((*i)->Number != -1) {
+ if ((*i)->Relocations.size() >= 0xffff) {
+ (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+ }
WriteSectionHeader((*i)->Header);
+ }
for (i = Sections.begin(), ie = Sections.end(),
j = Asm.begin(), je = Asm.end();
@@ -849,13 +851,23 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
assert(OS.tell() == (*i)->Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
- Asm.WriteSectionData(j, Layout);
+ Asm.writeSectionData(j, Layout);
}
if ((*i)->Relocations.size() > 0) {
assert(OS.tell() == (*i)->Header.PointerToRelocations &&
"Section::PointerToRelocations is insane!");
+ if ((*i)->Relocations.size() >= 0xffff) {
+ // In case of overflow, write actual relocation count as first
+ // relocation. Including the synthetic reloc itself (+ 1).
+ COFF::relocation r;
+ r.VirtualAddress = (*i)->Relocations.size() + 1;
+ r.SymbolTableIndex = 0;
+ r.Type = 0;
+ WriteRelocation(r);
+ }
+
for (relocations::const_iterator k = (*i)->Relocations.begin(),
ke = (*i)->Relocations.end();
k != ke; k++) {
@@ -877,11 +889,16 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
}
+MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) :
+ Machine(Machine_) {
+}
+
//------------------------------------------------------------------------------
// WinCOFFObjectWriter factory function
namespace llvm {
- MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
- return new WinCOFFObjectWriter(OS, is64Bit);
+ MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+ raw_ostream &OS) {
+ return new WinCOFFObjectWriter(MOTW, OS);
}
}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 7409daf39085..67dc649d4913 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -25,13 +25,13 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
namespace {
@@ -60,6 +60,7 @@ public:
virtual void EmitCOFFSymbolStorageClass(int StorageClass);
virtual void EmitCOFFSymbolType(int Type);
virtual void EndCOFFSymbolDef();
+ virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
@@ -77,7 +78,7 @@ public:
virtual void EmitFileDirective(StringRef Filename);
virtual void EmitInstruction(const MCInst &Instruction);
virtual void EmitWin64EHHandlerData();
- virtual void Finish();
+ virtual void FinishImpl();
private:
virtual void EmitInstToFragment(const MCInst &Inst) {
@@ -251,7 +252,6 @@ void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
default:
llvm_unreachable("unsupported attribute");
- break;
}
}
@@ -293,6 +293,16 @@ void WinCOFFStreamer::EndCOFFSymbolDef() {
CurSymbol = NULL;
}
+void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol)
+{
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ MCSymbolRefExpr::Create (Symbol, getContext ()),
+ FK_SecRel_4));
+ DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
llvm_unreachable("not implemented");
}
@@ -389,9 +399,9 @@ void WinCOFFStreamer::EmitWin64EHHandlerData() {
MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
}
-void WinCOFFStreamer::Finish() {
+void WinCOFFStreamer::FinishImpl() {
EmitW64Tables();
- MCObjectStreamer::Finish();
+ MCObjectStreamer::FinishImpl();
}
namespace llvm
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index e2eaff53c1f1..c5f15bafcfba 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -13,14 +13,15 @@
#include "llvm/Object/Archive.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
using namespace object;
-namespace {
-const StringRef Magic = "!<arch>\n";
+static const char *Magic = "!<arch>\n";
+namespace {
struct ArchiveMemberHeader {
char Name[16];
char LastModified[12];
@@ -32,7 +33,11 @@ struct ArchiveMemberHeader {
///! Get the name without looking up long names.
StringRef getName() const {
- char EndCond = Name[0] == '/' ? ' ' : '/';
+ char EndCond;
+ if (Name[0] == '/' || Name[0] == '#')
+ EndCond = ' ';
+ else
+ EndCond = '/';
StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond);
if (end == StringRef::npos)
end = sizeof(Name);
@@ -47,12 +52,30 @@ struct ArchiveMemberHeader {
return ret.getZExtValue();
}
};
+}
-const ArchiveMemberHeader *ToHeader(const char *base) {
+static const ArchiveMemberHeader *ToHeader(const char *base) {
return reinterpret_cast<const ArchiveMemberHeader *>(base);
}
+
+
+static bool isInternalMember(const ArchiveMemberHeader &amh) {
+ const char *internals[] = {
+ "/",
+ "//",
+ "#_LLVM_SYM_TAB_#"
+ };
+
+ StringRef name = amh.getName();
+ for (std::size_t i = 0; i < sizeof(internals) / sizeof(*internals); ++i) {
+ if (name == internals[i])
+ return true;
+ }
+ return false;
}
+void Archive::anchor() { }
+
Archive::Child Archive::Child::getNext() const {
size_t SpaceToSkip = sizeof(ArchiveMemberHeader) +
ToHeader(Data.data())->getSize();
@@ -101,6 +124,11 @@ error_code Archive::Child::getName(StringRef &Result) const {
return object_error::parse_failed;
Result = addr;
return object_error::success;
+ } else if (name.startswith("#1/")) {
+ APInt name_size;
+ name.substr(3).getAsInteger(10, name_size);
+ Result = Data.substr(0, name_size.getZExtValue());
+ return object_error::success;
}
// It's a simple name.
if (name[name.size() - 1] == '/')
@@ -111,14 +139,27 @@ error_code Archive::Child::getName(StringRef &Result) const {
}
uint64_t Archive::Child::getSize() const {
- return ToHeader(Data.data())->getSize();
+ uint64_t size = ToHeader(Data.data())->getSize();
+ // Don't include attached name.
+ StringRef name = ToHeader(Data.data())->getName();
+ if (name.startswith("#1/")) {
+ APInt name_size;
+ name.substr(3).getAsInteger(10, name_size);
+ size -= name_size.getZExtValue();
+ }
+ return size;
}
MemoryBuffer *Archive::Child::getBuffer() const {
StringRef name;
if (getName(name)) return NULL;
- return MemoryBuffer::getMemBuffer(Data.substr(sizeof(ArchiveMemberHeader),
- getSize()),
+ int size = sizeof(ArchiveMemberHeader);
+ if (name.startswith("#1/")) {
+ APInt name_size;
+ name.substr(3).getAsInteger(10, name_size);
+ size += name_size.getZExtValue();
+ }
+ return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()),
name,
false);
}
@@ -133,8 +174,7 @@ error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
}
Archive::Archive(MemoryBuffer *source, error_code &ec)
- : Binary(Binary::isArchive, source)
- , StringTable(Child(this, StringRef(0, 0))) {
+ : Binary(Binary::ID_Archive, source) {
// Check for sufficient magic.
if (!source || source->getBufferSize()
< (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive.
@@ -143,30 +183,90 @@ Archive::Archive(MemoryBuffer *source, error_code &ec)
return;
}
- // Get the string table. It's the 3rd member.
- child_iterator StrTable = begin_children();
+ // Get the special members.
+ child_iterator i = begin_children(false);
child_iterator e = end_children();
- for (int i = 0; StrTable != e && i < 2; ++StrTable, ++i) {}
- // Check to see if there were 3 members, or the 3rd member wasn't named "//".
- StringRef name;
- if (StrTable != e && !StrTable->getName(name) && name == "//")
- StringTable = StrTable;
+ if (i != e) ++i; // Nobody cares about the first member.
+ if (i != e) {
+ SymbolTable = i;
+ ++i;
+ }
+ if (i != e) {
+ StringTable = i;
+ }
ec = object_error::success;
}
-Archive::child_iterator Archive::begin_children() const {
- const char *Loc = Data->getBufferStart() + Magic.size();
+Archive::child_iterator Archive::begin_children(bool skip_internal) const {
+ const char *Loc = Data->getBufferStart() + strlen(Magic);
size_t Size = sizeof(ArchiveMemberHeader) +
ToHeader(Loc)->getSize();
- return Child(this, StringRef(Loc, Size));
+ Child c(this, StringRef(Loc, Size));
+ // Skip internals at the beginning of an archive.
+ if (skip_internal && isInternalMember(*ToHeader(Loc)))
+ return c.getNext();
+ return c;
}
Archive::child_iterator Archive::end_children() const {
return Child(this, StringRef(0, 0));
}
-namespace llvm {
+error_code Archive::Symbol::getName(StringRef &Result) const {
+ Result =
+ StringRef(Parent->SymbolTable->getBuffer()->getBufferStart() + StringIndex);
+ return object_error::success;
+}
+
+error_code Archive::Symbol::getMember(child_iterator &Result) const {
+ const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart();
+ uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ const char *offsets = buf + 4;
+ buf += 4 + (member_count * 4); // Skip offsets.
+ const char *indicies = buf + 4;
+
+ uint16_t offsetindex =
+ *(reinterpret_cast<const support::ulittle16_t*>(indicies)
+ + SymbolIndex);
+
+ uint32_t offset = *(reinterpret_cast<const support::ulittle32_t*>(offsets)
+ + (offsetindex - 1));
-} // end namespace llvm
+ const char *Loc = Parent->getData().begin() + offset;
+ size_t Size = sizeof(ArchiveMemberHeader) +
+ ToHeader(Loc)->getSize();
+ Result = Child(Parent, StringRef(Loc, Size));
+
+ return object_error::success;
+}
+
+Archive::Symbol Archive::Symbol::getNext() const {
+ Symbol t(*this);
+ // Go to one past next null.
+ t.StringIndex =
+ Parent->SymbolTable->getBuffer()->getBuffer().find('\0', t.StringIndex) + 1;
+ ++t.SymbolIndex;
+ return t;
+}
+
+Archive::symbol_iterator Archive::begin_symbols() const {
+ const char *buf = SymbolTable->getBuffer()->getBufferStart();
+ uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (member_count * 4); // Skip offsets.
+ uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (symbol_count * 2); // Skip indices.
+ uint32_t string_start_offset =
+ buf - SymbolTable->getBuffer()->getBufferStart();
+ return symbol_iterator(Symbol(this, 0, string_start_offset));
+}
+
+Archive::symbol_iterator Archive::end_symbols() const {
+ const char *buf = SymbolTable->getBuffer()->getBufferStart();
+ uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (member_count * 4); // Skip offsets.
+ uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ return symbol_iterator(
+ Symbol(this, symbol_count, 0));
+}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 86eb51a01646..c20fc0cc399d 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMObject
Object.cpp
ObjectFile.cpp
)
-
-add_llvm_library_dependencies(LLVMObject
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 750c34d12a18..bd27a56e73b9 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/COFF.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -98,24 +99,10 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
StringRef &Result) const {
const coff_symbol *symb = toSymb(Symb);
- // Check for string table entry. First 4 bytes are 0.
- if (symb->Name.Offset.Zeroes == 0) {
- uint32_t Offset = symb->Name.Offset.Offset;
- if (error_code ec = getString(Offset, Result))
- return ec;
- return object_error::success;
- }
-
- if (symb->Name.ShortName[7] == 0)
- // Null terminated, let ::strlen figure out the length.
- Result = StringRef(symb->Name.ShortName);
- else
- // Not null terminated, use all 8 bytes.
- Result = StringRef(symb->Name.ShortName, 8);
- return object_error::success;
+ return getSymbolName(symb, Result);
}
-error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
+error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb,
uint64_t &Result) const {
const coff_symbol *symb = toSymb(Symb);
const coff_section *Section = NULL;
@@ -127,7 +114,7 @@ error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
if (Type == 'U' || Type == 'w')
Result = UnknownAddressOrSize;
else if (Section)
- Result = Section->VirtualAddress + symb->Value;
+ Result = Section->PointerToRawData + symb->Value;
else
Result = symb->Value;
return object_error::success;
@@ -145,23 +132,21 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
if (Type == 'U' || Type == 'w')
Result = UnknownAddressOrSize;
else if (Section)
- Result = reinterpret_cast<uintptr_t>(base() +
- Section->PointerToRawData +
- symb->Value);
+ Result = Section->VirtualAddress + symb->Value;
else
- Result = reinterpret_cast<uintptr_t>(base() + symb->Value);
+ Result = symb->Value;
return object_error::success;
}
error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
- SymbolRef::SymbolType &Result) const {
+ SymbolRef::Type &Result) const {
const coff_symbol *symb = toSymb(Symb);
Result = SymbolRef::ST_Other;
if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) {
- Result = SymbolRef::ST_External;
+ Result = SymbolRef::ST_Unknown;
} else {
- if (symb->Type.ComplexType == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
+ if (symb->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
Result = SymbolRef::ST_Function;
} else {
char Type;
@@ -175,10 +160,27 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
return object_error::success;
}
-error_code COFFObjectFile::isSymbolGlobal(DataRefImpl Symb,
- bool &Result) const {
+error_code COFFObjectFile::getSymbolFlags(DataRefImpl Symb,
+ uint32_t &Result) const {
const coff_symbol *symb = toSymb(Symb);
- Result = (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ Result = SymbolRef::SF_None;
+
+ // TODO: Correctly set SF_FormatSpecific, SF_ThreadLocal, SF_Common
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+ symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
+ Result |= SymbolRef::SF_Undefined;
+
+ // TODO: This are certainly too restrictive.
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+ Result |= SymbolRef::SF_Global;
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+ Result |= SymbolRef::SF_Weak;
+
+ if (symb->SectionNumber == COFF::IMAGE_SYM_ABSOLUTE)
+ Result |= SymbolRef::SF_Absolute;
+
return object_error::success;
}
@@ -233,7 +235,9 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
Result = 'w';
return object_error::success; // Don't do ::toupper.
- } else
+ } else if (symb->Value != 0) // Check for common symbols.
+ ret = 'c';
+ else
ret = 'u';
break;
case COFF::IMAGE_SYM_ABSOLUTE:
@@ -269,9 +273,18 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
return object_error::success;
}
-error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb,
- bool &Result) const {
- Result = false;
+error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb,
+ section_iterator &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ if (symb->SectionNumber <= COFF::IMAGE_SYM_UNDEFINED)
+ Result = end_sections();
+ else {
+ const coff_section *sec = 0;
+ if (error_code ec = getSection(symb->SectionNumber, sec)) return ec;
+ DataRefImpl Sec;
+ Sec.p = reinterpret_cast<uintptr_t>(sec);
+ Result = section_iterator(SectionRef(Sec, this));
+ }
return object_error::success;
}
@@ -287,24 +300,7 @@ error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
StringRef &Result) const {
const coff_section *sec = toSec(Sec);
- StringRef name;
- if (sec->Name[7] == 0)
- // Null terminated, let ::strlen figure out the length.
- name = sec->Name;
- else
- // Not null terminated, use all 8 bytes.
- name = StringRef(sec->Name, 8);
-
- // Check for string table entry. First byte is '/'.
- if (name[0] == '/') {
- uint32_t Offset;
- name.substr(1).getAsInteger(10, Offset);
- if (error_code ec = getString(Offset, name))
- return ec;
- }
-
- Result = name;
- return object_error::success;
+ return getSectionName(sec, Result);
}
error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
@@ -324,16 +320,10 @@ error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
StringRef &Result) const {
const coff_section *sec = toSec(Sec);
- // The only thing that we need to verify is that the contents is contained
- // within the file bounds. We don't need to make sure it doesn't cover other
- // data, as there's nothing that says that is not allowed.
- uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
- uintptr_t con_end = con_start + sec->SizeOfRawData;
- if (con_end >= uintptr_t(Data->getBufferEnd()))
- return object_error::parse_failed;
- Result = StringRef(reinterpret_cast<const char*>(con_start),
- sec->SizeOfRawData);
- return object_error::success;
+ ArrayRef<uint8_t> Res;
+ error_code EC = getSectionContents(sec, Res);
+ Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
+ return EC;
}
error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec,
@@ -366,12 +356,33 @@ error_code COFFObjectFile::isSectionBSS(DataRefImpl Sec,
return object_error::success;
}
+error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented
+ Result = true;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionVirtual(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented
+ Result = false;
+ return object_error::success;
+}
+
error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
DataRefImpl Symb,
bool &Result) const {
const coff_section *sec = toSec(Sec);
const coff_symbol *symb = toSymb(Symb);
- const coff_section *symb_sec;
+ const coff_section *symb_sec = 0;
if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec;
if (symb_sec == sec)
Result = true;
@@ -383,7 +394,6 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
const coff_section *sec = toSec(Sec);
DataRefImpl ret;
- std::memset(&ret, 0, sizeof(ret));
if (sec->NumberOfRelocations == 0)
ret.p = 0;
else
@@ -395,7 +405,6 @@ relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
const coff_section *sec = toSec(Sec);
DataRefImpl ret;
- std::memset(&ret, 0, sizeof(ret));
if (sec->NumberOfRelocations == 0)
ret.p = 0;
else
@@ -408,7 +417,12 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
}
COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
- : ObjectFile(Binary::isCOFF, Object, ec) {
+ : ObjectFile(Binary::ID_COFF, Object, ec)
+ , Header(0)
+ , SectionTable(0)
+ , SymbolTable(0)
+ , StringTable(0)
+ , StringTableSize(0) {
// Check that we at least have enough room for a header.
if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
@@ -421,7 +435,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
// PE/COFF, seek through MS-DOS compatibility stub and 4-byte
// PE signature to find 'normal' COFF header.
if (!checkSize(Data, ec, 0x3c + 8)) return;
- HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c);
+ HeaderStart = *reinterpret_cast<const ulittle16_t *>(base() + 0x3c);
// Check the PE header. ("PE\0\0")
if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
ec = object_error::parse_failed;
@@ -443,28 +457,30 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
Header->NumberOfSections * sizeof(coff_section)))
return;
- SymbolTable =
- reinterpret_cast<const coff_symbol *>(base()
- + Header->PointerToSymbolTable);
- if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
- Header->NumberOfSymbols * sizeof(coff_symbol)))
- return;
+ if (Header->PointerToSymbolTable != 0) {
+ SymbolTable =
+ reinterpret_cast<const coff_symbol *>(base()
+ + Header->PointerToSymbolTable);
+ if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+ Header->NumberOfSymbols * sizeof(coff_symbol)))
+ return;
- // Find string table.
- StringTable = reinterpret_cast<const char *>(base())
- + Header->PointerToSymbolTable
- + Header->NumberOfSymbols * sizeof(coff_symbol);
- if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
- return;
+ // Find string table.
+ StringTable = reinterpret_cast<const char *>(base())
+ + Header->PointerToSymbolTable
+ + Header->NumberOfSymbols * sizeof(coff_symbol);
+ if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+ return;
- StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
- if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
- return;
- // Check that the string table is null terminated if has any in it.
- if (StringTableSize < 4
- || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
- ec = object_error::parse_failed;
- return;
+ StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+ if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+ return;
+ // Check that the string table is null terminated if has any in it.
+ if (StringTableSize < 4
+ || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+ ec = object_error::parse_failed;
+ return;
+ }
}
ec = object_error::success;
@@ -472,7 +488,6 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
symbol_iterator COFFObjectFile::begin_symbols() const {
DataRefImpl ret;
- std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(SymbolTable);
return symbol_iterator(SymbolRef(ret, this));
}
@@ -480,21 +495,44 @@ symbol_iterator COFFObjectFile::begin_symbols() const {
symbol_iterator COFFObjectFile::end_symbols() const {
// The symbol table ends where the string table begins.
DataRefImpl ret;
- std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(StringTable);
return symbol_iterator(SymbolRef(ret, this));
}
+symbol_iterator COFFObjectFile::begin_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in COFFObjectFile");
+}
+
+symbol_iterator COFFObjectFile::end_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in COFFObjectFile");
+}
+
+library_iterator COFFObjectFile::begin_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
+}
+
+library_iterator COFFObjectFile::end_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
+}
+
+StringRef COFFObjectFile::getLoadName() const {
+ // COFF does not have this field.
+ return "";
+}
+
+
section_iterator COFFObjectFile::begin_sections() const {
DataRefImpl ret;
- std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(SectionTable);
return section_iterator(SectionRef(ret, this));
}
section_iterator COFFObjectFile::end_sections() const {
DataRefImpl ret;
- std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
return section_iterator(SectionRef(ret, this));
}
@@ -525,6 +563,11 @@ unsigned COFFObjectFile::getArch() const {
}
}
+error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const {
+ Res = Header;
+ return object_error::success;
+}
+
error_code COFFObjectFile::getSection(int32_t index,
const coff_section *&Result) const {
// Check for special index values.
@@ -553,13 +596,69 @@ error_code COFFObjectFile::getString(uint32_t offset,
error_code COFFObjectFile::getSymbol(uint32_t index,
const coff_symbol *&Result) const {
- if (index > 0 && index < Header->NumberOfSymbols)
+ if (index < Header->NumberOfSymbols)
Result = SymbolTable + index;
else
return object_error::parse_failed;
return object_error::success;
}
+error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol,
+ StringRef &Res) const {
+ // Check for string table entry. First 4 bytes are 0.
+ if (symbol->Name.Offset.Zeroes == 0) {
+ uint32_t Offset = symbol->Name.Offset.Offset;
+ if (error_code ec = getString(Offset, Res))
+ return ec;
+ return object_error::success;
+ }
+
+ if (symbol->Name.ShortName[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ Res = StringRef(symbol->Name.ShortName);
+ else
+ // Not null terminated, use all 8 bytes.
+ Res = StringRef(symbol->Name.ShortName, 8);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionName(const coff_section *Sec,
+ StringRef &Res) const {
+ StringRef Name;
+ if (Sec->Name[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ Name = Sec->Name;
+ else
+ // Not null terminated, use all 8 bytes.
+ Name = StringRef(Sec->Name, 8);
+
+ // Check for string table entry. First byte is '/'.
+ if (Name[0] == '/') {
+ uint32_t Offset;
+ if (Name.substr(1).getAsInteger(10, Offset))
+ return object_error::parse_failed;
+ if (error_code ec = getString(Offset, Name))
+ return ec;
+ }
+
+ Res = Name;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(const coff_section *Sec,
+ ArrayRef<uint8_t> &Res) const {
+ // The only thing that we need to verify is that the contents is contained
+ // within the file bounds. We don't need to make sure it doesn't cover other
+ // data, as there's nothing that says that is not allowed.
+ uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
+ uintptr_t ConEnd = ConStart + Sec->SizeOfRawData;
+ if (ConEnd > uintptr_t(Data->getBufferEnd()))
+ return object_error::parse_failed;
+ Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart),
+ Sec->SizeOfRawData);
+ return object_error::success;
+}
+
const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
return reinterpret_cast<const coff_relocation*>(Rel.p);
}
@@ -575,6 +674,11 @@ error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
Res = toRel(Rel)->VirtualAddress;
return object_error::success;
}
+error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
+ Res = toRel(Rel)->VirtualAddress;
+ return object_error::success;
+}
error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
SymbolRef &Res) const {
const coff_relocation* R = toRel(Rel);
@@ -584,7 +688,7 @@ error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
return object_error::success;
}
error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
- uint32_t &Res) const {
+ uint64_t &Res) const {
const coff_relocation* R = toRel(Rel);
Res = R->Type;
return object_error::success;
@@ -658,7 +762,6 @@ error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
const coff_symbol *symb = 0;
if (error_code ec = getSymbol(reloc->SymbolTableIndex, symb)) return ec;
DataRefImpl sym;
- ::memset(&sym, 0, sizeof(sym));
sym.p = reinterpret_cast<uintptr_t>(symb);
StringRef symname;
if (error_code ec = getSymbolName(sym, symname)) return ec;
@@ -666,6 +769,16 @@ error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
return object_error::success;
}
+error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData,
+ LibraryRef &Result) const {
+ report_fatal_error("getLibraryNext not implemented in COFFObjectFile");
+}
+
+error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
+ StringRef &Result) const {
+ report_fatal_error("getLibraryPath not implemented in COFFObjectFile");
+}
+
namespace llvm {
ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 257d08cadff3..ab5f8108af13 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -7,1405 +7,44 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the ELFObjectFile class.
+// Part of the ELFObjectFile class implementation.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <limits>
-#include <utility>
+#include "llvm/Object/ELF.h"
-using namespace llvm;
-using namespace object;
-
-// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
-namespace {
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelperCommon {
- typedef support::detail::packed_endian_specific_integral
- <uint16_t, target_endianness, support::aligned> Elf_Half;
- typedef support::detail::packed_endian_specific_integral
- <uint32_t, target_endianness, support::aligned> Elf_Word;
- typedef support::detail::packed_endian_specific_integral
- <int32_t, target_endianness, support::aligned> Elf_Sword;
- typedef support::detail::packed_endian_specific_integral
- <uint64_t, target_endianness, support::aligned> Elf_Xword;
- typedef support::detail::packed_endian_specific_integral
- <int64_t, target_endianness, support::aligned> Elf_Sxword;
-};
-}
-
-namespace {
-template<support::endianness target_endianness, bool is64Bits>
-struct ELFDataTypeTypedefHelper;
-
-/// ELF 32bit types.
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelper<target_endianness, false>
- : ELFDataTypeTypedefHelperCommon<target_endianness> {
- typedef support::detail::packed_endian_specific_integral
- <uint32_t, target_endianness, support::aligned> Elf_Addr;
- typedef support::detail::packed_endian_specific_integral
- <uint32_t, target_endianness, support::aligned> Elf_Off;
-};
-
-/// ELF 64bit types.
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelper<target_endianness, true>
- : ELFDataTypeTypedefHelperCommon<target_endianness>{
- typedef support::detail::packed_endian_specific_integral
- <uint64_t, target_endianness, support::aligned> Elf_Addr;
- typedef support::detail::packed_endian_specific_integral
- <uint64_t, target_endianness, support::aligned> Elf_Off;
-};
-}
-
-// I really don't like doing this, but the alternative is copypasta.
-#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
-typedef typename \
- ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
-
- // Section header.
-namespace {
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Shdr_Base;
-
-template<support::endianness target_endianness>
-struct Elf_Shdr_Base<target_endianness, false> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, false)
- Elf_Word sh_name; // Section name (index into string table)
- Elf_Word sh_type; // Section type (SHT_*)
- Elf_Word sh_flags; // Section flags (SHF_*)
- Elf_Addr sh_addr; // Address where section is to be loaded
- Elf_Off sh_offset; // File offset of section data, in bytes
- Elf_Word sh_size; // Size of section, in bytes
- Elf_Word sh_link; // Section type-specific header table index link
- Elf_Word sh_info; // Section type-specific extra information
- Elf_Word sh_addralign;// Section address alignment
- Elf_Word sh_entsize; // Size of records contained within the section
-};
-
-template<support::endianness target_endianness>
-struct Elf_Shdr_Base<target_endianness, true> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, true)
- Elf_Word sh_name; // Section name (index into string table)
- Elf_Word sh_type; // Section type (SHT_*)
- Elf_Xword sh_flags; // Section flags (SHF_*)
- Elf_Addr sh_addr; // Address where section is to be loaded
- Elf_Off sh_offset; // File offset of section data, in bytes
- Elf_Xword sh_size; // Size of section, in bytes
- Elf_Word sh_link; // Section type-specific header table index link
- Elf_Word sh_info; // Section type-specific extra information
- Elf_Xword sh_addralign;// Section address alignment
- Elf_Xword sh_entsize; // Size of records contained within the section
-};
-
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
- using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
- using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
-
- /// @brief Get the number of entities this section contains if it has any.
- unsigned getEntityCount() const {
- if (sh_entsize == 0)
- return 0;
- return sh_size / sh_entsize;
- }
-};
-}
-
-namespace {
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Sym_Base;
-
-template<support::endianness target_endianness>
-struct Elf_Sym_Base<target_endianness, false> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, false)
- Elf_Word st_name; // Symbol name (index into string table)
- Elf_Addr st_value; // Value or address associated with the symbol
- Elf_Word st_size; // Size of the symbol
- unsigned char st_info; // Symbol's type and binding attributes
- unsigned char st_other; // Must be zero; reserved
- Elf_Half st_shndx; // Which section (header table index) it's defined in
-};
-
-template<support::endianness target_endianness>
-struct Elf_Sym_Base<target_endianness, true> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, true)
- Elf_Word st_name; // Symbol name (index into string table)
- unsigned char st_info; // Symbol's type and binding attributes
- unsigned char st_other; // Must be zero; reserved
- Elf_Half st_shndx; // Which section (header table index) it's defined in
- Elf_Addr st_value; // Value or address associated with the symbol
- Elf_Xword st_size; // Size of the symbol
-};
-
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
- using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
-
- // These accessors and mutators correspond to the ELF32_ST_BIND,
- // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
- unsigned char getBinding() const { return st_info >> 4; }
- unsigned char getType() const { return st_info & 0x0f; }
- void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
- void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
- void setBindingAndType(unsigned char b, unsigned char t) {
- st_info = (b << 4) + (t & 0x0f);
- }
-};
-}
-
-namespace {
-template<support::endianness target_endianness, bool is64Bits, bool isRela>
-struct Elf_Rel_Base;
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, false, false> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, false)
- Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
- Elf_Word r_info; // Symbol table index and type of relocation to apply
-};
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, true, false> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, true)
- Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
- Elf_Xword r_info; // Symbol table index and type of relocation to apply
-};
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, false, true> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, false)
- Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
- Elf_Word r_info; // Symbol table index and type of relocation to apply
- Elf_Sword r_addend; // Compute value for relocatable field by adding this
-};
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, true, true> {
- LLVM_ELF_IMPORT_TYPES(target_endianness, true)
- Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
- Elf_Xword r_info; // Symbol table index and type of relocation to apply
- Elf_Sxword r_addend; // Compute value for relocatable field by adding this.
-};
-
-template<support::endianness target_endianness, bool is64Bits, bool isRela>
-struct Elf_Rel_Impl;
-
-template<support::endianness target_endianness, bool isRela>
-struct Elf_Rel_Impl<target_endianness, true, isRela>
- : Elf_Rel_Base<target_endianness, true, isRela> {
- using Elf_Rel_Base<target_endianness, true, isRela>::r_info;
- LLVM_ELF_IMPORT_TYPES(target_endianness, true)
-
- // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
- // and ELF64_R_INFO macros defined in the ELF specification:
- uint64_t getSymbol() const { return (r_info >> 32); }
- unsigned char getType() const {
- return (unsigned char) (r_info & 0xffffffffL);
- }
- void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); }
- void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
- void setSymbolAndType(uint64_t s, unsigned char t) {
- r_info = (s << 32) + (t&0xffffffffL);
- }
-};
-
-template<support::endianness target_endianness, bool isRela>
-struct Elf_Rel_Impl<target_endianness, false, isRela>
- : Elf_Rel_Base<target_endianness, false, isRela> {
- using Elf_Rel_Base<target_endianness, false, isRela>::r_info;
- LLVM_ELF_IMPORT_TYPES(target_endianness, false)
-
- // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
- // and ELF32_R_INFO macros defined in the ELF specification:
- uint32_t getSymbol() const { return (r_info >> 8); }
- unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
- void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
- void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
- void setSymbolAndType(uint32_t s, unsigned char t) {
- r_info = (s << 8) + t;
- }
-};
+namespace llvm {
-}
+using namespace object;
namespace {
-template<support::endianness target_endianness, bool is64Bits>
-class ELFObjectFile : public ObjectFile {
- LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
-
- typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
- typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
- typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
- typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
-
- struct Elf_Ehdr {
- unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
- Elf_Half e_type; // Type of file (see ET_*)
- Elf_Half e_machine; // Required architecture for this file (see EM_*)
- Elf_Word e_version; // Must be equal to 1
- Elf_Addr e_entry; // Address to jump to in order to start program
- Elf_Off e_phoff; // Program header table's file offset, in bytes
- Elf_Off e_shoff; // Section header table's file offset, in bytes
- Elf_Word e_flags; // Processor-specific flags
- Elf_Half e_ehsize; // Size of ELF header, in bytes
- Elf_Half e_phentsize;// Size of an entry in the program header table
- Elf_Half e_phnum; // Number of entries in the program header table
- Elf_Half e_shentsize;// Size of an entry in the section header table
- Elf_Half e_shnum; // Number of entries in the section header table
- Elf_Half e_shstrndx; // Section header table index of section name
- // string table
- bool checkMagic() const {
- return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
- }
- unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
- unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
- };
-
- typedef SmallVector<const Elf_Shdr*, 1> Sections_t;
- typedef DenseMap<unsigned, unsigned> IndexMap_t;
- typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t;
-
- const Elf_Ehdr *Header;
- const Elf_Shdr *SectionHeaderTable;
- const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
- const Elf_Shdr *dot_strtab_sec; // Symbol header string table.
- Sections_t SymbolTableSections;
- IndexMap_t SymbolTableSectionsIndexMap;
- DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
-
- /// @brief Map sections to an array of relocation sections that reference
- /// them sorted by section index.
- RelocMap_t SectionRelocMap;
-
- /// @brief Get the relocation section that contains \a Rel.
- const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
- return getSection(Rel.w.b);
- }
-
- void validateSymbol(DataRefImpl Symb) const;
- bool isRelocationHasAddend(DataRefImpl Rel) const;
- template<typename T>
- const T *getEntry(uint16_t Section, uint32_t Entry) const;
- template<typename T>
- const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
- const Elf_Sym *getSymbol(DataRefImpl Symb) const;
- const Elf_Shdr *getSection(DataRefImpl index) const;
- const Elf_Shdr *getSection(uint32_t index) const;
- const Elf_Rel *getRel(DataRefImpl Rel) const;
- const Elf_Rela *getRela(DataRefImpl Rela) const;
- const char *getString(uint32_t section, uint32_t offset) const;
- const char *getString(const Elf_Shdr *section, uint32_t offset) const;
- error_code getSymbolName(const Elf_Sym *Symb, StringRef &Res) const;
-
-protected:
- virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
- virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
- virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
- virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
- virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
- virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
- virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
- virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const;
- virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const;
-
- virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
- virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
- virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
- virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
- virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
- virtual error_code getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const;
- virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
- virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const;
- virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const;
- virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
- bool &Result) const;
- virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
- virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const;
-
- virtual error_code getRelocationNext(DataRefImpl Rel,
- RelocationRef &Res) const;
- virtual error_code getRelocationAddress(DataRefImpl Rel,
- uint64_t &Res) const;
- virtual error_code getRelocationSymbol(DataRefImpl Rel,
- SymbolRef &Res) const;
- virtual error_code getRelocationType(DataRefImpl Rel,
- uint32_t &Res) const;
- virtual error_code getRelocationTypeName(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const;
- virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
- int64_t &Res) const;
- virtual error_code getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const;
-
-public:
- ELFObjectFile(MemoryBuffer *Object, error_code &ec);
- virtual symbol_iterator begin_symbols() const;
- virtual symbol_iterator end_symbols() const;
- virtual section_iterator begin_sections() const;
- virtual section_iterator end_sections() const;
-
- virtual uint8_t getBytesInAddress() const;
- virtual StringRef getFileFormatName() const;
- virtual unsigned getArch() const;
-
- uint64_t getNumSections() const;
- uint64_t getStringTableIndex() const;
- ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
- const Elf_Shdr *getSection(const Elf_Sym *symb) const;
-};
-} // end namespace
-
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>
- ::validateSymbol(DataRefImpl Symb) const {
- const Elf_Sym *symb = getSymbol(Symb);
- const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
- // FIXME: We really need to do proper error handling in the case of an invalid
- // input file. Because we don't use exceptions, I think we'll just pass
- // an error object around.
- if (!( symb
- && SymbolTableSection
- && symb >= (const Elf_Sym*)(base()
- + SymbolTableSection->sh_offset)
- && symb < (const Elf_Sym*)(base()
- + SymbolTableSection->sh_offset
- + SymbolTableSection->sh_size)))
- // FIXME: Proper error handling.
- report_fatal_error("Symb must point to a valid symbol!");
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolNext(DataRefImpl Symb,
- SymbolRef &Result) const {
- validateSymbol(Symb);
- const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
-
- ++Symb.d.a;
- // Check to see if we are at the end of this symbol table.
- if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
- // We are at the end. If there are other symbol tables, jump to them.
- ++Symb.d.b;
- Symb.d.a = 1; // The 0th symbol in ELF is fake.
- // Otherwise return the terminator.
- if (Symb.d.b >= SymbolTableSections.size()) {
- Symb.d.a = std::numeric_limits<uint32_t>::max();
- Symb.d.b = std::numeric_limits<uint32_t>::max();
- }
- }
-
- Result = SymbolRef(Symb, this);
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolName(DataRefImpl Symb,
- StringRef &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
- return getSymbolName(symb, Result);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolTableIndex(const Elf_Sym *symb) const {
- if (symb->st_shndx == ELF::SHN_XINDEX)
- return ExtendedSymbolTable.lookup(symb);
- return symb->st_shndx;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>
- ::getSection(const Elf_Sym *symb) const {
- if (symb->st_shndx == ELF::SHN_XINDEX)
- return getSection(ExtendedSymbolTable.lookup(symb));
- if (symb->st_shndx >= ELF::SHN_LORESERVE)
- return 0;
- return getSection(symb->st_shndx);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolOffset(DataRefImpl Symb,
- uint64_t &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
- const Elf_Shdr *Section;
- switch (getSymbolTableIndex(symb)) {
- case ELF::SHN_COMMON:
- // Undefined symbols have no address yet.
- case ELF::SHN_UNDEF:
- Result = UnknownAddressOrSize;
- return object_error::success;
- case ELF::SHN_ABS:
- Result = symb->st_value;
- return object_error::success;
- default: Section = getSection(symb);
- }
-
- switch (symb->getType()) {
- case ELF::STT_SECTION:
- Result = Section ? Section->sh_addr : UnknownAddressOrSize;
- return object_error::success;
- case ELF::STT_FUNC:
- case ELF::STT_OBJECT:
- case ELF::STT_NOTYPE:
- Result = symb->st_value;
- return object_error::success;
- default:
- Result = UnknownAddressOrSize;
- return object_error::success;
- }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolAddress(DataRefImpl Symb,
- uint64_t &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
- const Elf_Shdr *Section;
- switch (getSymbolTableIndex(symb)) {
- case ELF::SHN_COMMON: // Fall through.
- // Undefined symbols have no address yet.
- case ELF::SHN_UNDEF:
- Result = UnknownAddressOrSize;
- return object_error::success;
- case ELF::SHN_ABS:
- Result = reinterpret_cast<uintptr_t>(base()+symb->st_value);
- return object_error::success;
- default: Section = getSection(symb);
+ std::pair<unsigned char, unsigned char>
+ getElfArchType(MemoryBuffer *Object) {
+ if (Object->getBufferSize() < ELF::EI_NIDENT)
+ return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+ return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+ , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
}
- const uint8_t* addr = base();
- if (Section)
- addr += Section->sh_offset;
- switch (symb->getType()) {
- case ELF::STT_SECTION:
- Result = reinterpret_cast<uintptr_t>(addr);
- return object_error::success;
- case ELF::STT_FUNC: // Fall through.
- case ELF::STT_OBJECT: // Fall through.
- case ELF::STT_NOTYPE:
- addr += symb->st_value;
- Result = reinterpret_cast<uintptr_t>(addr);
- return object_error::success;
- default:
- Result = UnknownAddressOrSize;
- return object_error::success;
- }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolSize(DataRefImpl Symb,
- uint64_t &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
- if (symb->st_size == 0)
- Result = UnknownAddressOrSize;
- Result = symb->st_size;
- return object_error::success;
}
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolNMTypeChar(DataRefImpl Symb,
- char &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
- const Elf_Shdr *Section = getSection(symb);
-
- char ret = '?';
+// Creates an in-memory object-file by default: createELFObjectFile(Buffer)
+ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+ std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+ error_code ec;
- if (Section) {
- switch (Section->sh_type) {
- case ELF::SHT_PROGBITS:
- case ELF::SHT_DYNAMIC:
- switch (Section->sh_flags) {
- case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
- ret = 't'; break;
- case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
- ret = 'd'; break;
- case ELF::SHF_ALLOC:
- case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
- case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
- ret = 'r'; break;
- }
- break;
- case ELF::SHT_NOBITS: ret = 'b';
- }
+ if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+ return new ELFObjectFile<support::little, false>(Object, ec);
+ else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+ return new ELFObjectFile<support::big, false>(Object, ec);
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+ return new ELFObjectFile<support::big, true>(Object, ec);
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
+ ELFObjectFile<support::little, true> *result =
+ new ELFObjectFile<support::little, true>(Object, ec);
+ return result;
}
- switch (getSymbolTableIndex(symb)) {
- case ELF::SHN_UNDEF:
- if (ret == '?')
- ret = 'U';
- break;
- case ELF::SHN_ABS: ret = 'a'; break;
- case ELF::SHN_COMMON: ret = 'c'; break;
- }
-
- switch (symb->getBinding()) {
- case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
- case ELF::STB_WEAK:
- if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF)
- ret = 'w';
- else
- if (symb->getType() == ELF::STT_OBJECT)
- ret = 'V';
- else
- ret = 'W';
- }
-
- if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
- StringRef name;
- if (error_code ec = getSymbolName(Symb, name))
- return ec;
- Result = StringSwitch<char>(name)
- .StartsWith(".debug", 'N')
- .StartsWith(".note", 'n')
- .Default('?');
- return object_error::success;
- }
-
- Result = ret;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolType(DataRefImpl Symb,
- SymbolRef::SymbolType &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
-
- if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) {
- Result = SymbolRef::ST_External;
- return object_error::success;
- }
-
- switch (symb->getType()) {
- case ELF::STT_FUNC:
- Result = SymbolRef::ST_Function;
- break;
- case ELF::STT_OBJECT:
- Result = SymbolRef::ST_Data;
- break;
- default:
- Result = SymbolRef::ST_Other;
- break;
- }
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::isSymbolGlobal(DataRefImpl Symb,
- bool &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
-
- Result = symb->getBinding() == ELF::STB_GLOBAL;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::isSymbolInternal(DataRefImpl Symb,
- bool &Result) const {
- validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
-
- if ( symb->getType() == ELF::STT_FILE
- || symb->getType() == ELF::STT_SECTION)
- Result = true;
- Result = false;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
- const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
- sec += Header->e_shentsize;
- Sec.p = reinterpret_cast<intptr_t>(sec);
- Result = SectionRef(Sec, this);
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSectionName(DataRefImpl Sec,
- StringRef &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSectionAddress(DataRefImpl Sec,
- uint64_t &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- Result = sec->sh_addr;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSectionSize(DataRefImpl Sec,
- uint64_t &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- Result = sec->sh_size;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSectionContents(DataRefImpl Sec,
- StringRef &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- const char *start = (const char*)base() + sec->sh_offset;
- Result = StringRef(start, sec->sh_size);
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSectionAlignment(DataRefImpl Sec,
- uint64_t &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- Result = sec->sh_addralign;
- return object_error::success;
+ report_fatal_error("Buffer is not an ELF object file!");
}
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::isSectionText(DataRefImpl Sec,
- bool &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- if (sec->sh_flags & ELF::SHF_EXECINSTR)
- Result = true;
- else
- Result = false;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::isSectionData(DataRefImpl Sec,
- bool &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
- && sec->sh_type == ELF::SHT_PROGBITS)
- Result = true;
- else
- Result = false;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::isSectionBSS(DataRefImpl Sec,
- bool &Result) const {
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
- && sec->sh_type == ELF::SHT_NOBITS)
- Result = true;
- else
- Result = false;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::sectionContainsSymbol(DataRefImpl Sec,
- DataRefImpl Symb,
- bool &Result) const {
- // FIXME: Unimplemented.
- Result = false;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-relocation_iterator ELFObjectFile<target_endianness, is64Bits>
- ::getSectionRelBegin(DataRefImpl Sec) const {
- DataRefImpl RelData;
- memset(&RelData, 0, sizeof(RelData));
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
- if (sec != 0 && ittr != SectionRelocMap.end()) {
- RelData.w.a = getSection(ittr->second[0])->sh_info;
- RelData.w.b = ittr->second[0];
- RelData.w.c = 0;
- }
- return relocation_iterator(RelocationRef(RelData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-relocation_iterator ELFObjectFile<target_endianness, is64Bits>
- ::getSectionRelEnd(DataRefImpl Sec) const {
- DataRefImpl RelData;
- memset(&RelData, 0, sizeof(RelData));
- const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
- typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
- if (sec != 0 && ittr != SectionRelocMap.end()) {
- // Get the index of the last relocation section for this section.
- std::size_t relocsecindex = ittr->second[ittr->second.size() - 1];
- const Elf_Shdr *relocsec = getSection(relocsecindex);
- RelData.w.a = relocsec->sh_info;
- RelData.w.b = relocsecindex;
- RelData.w.c = relocsec->sh_size / relocsec->sh_entsize;
- }
- return relocation_iterator(RelocationRef(RelData, this));
-}
-
-// Relocations
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationNext(DataRefImpl Rel,
- RelocationRef &Result) const {
- ++Rel.w.c;
- const Elf_Shdr *relocsec = getSection(Rel.w.b);
- if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) {
- // We have reached the end of the relocations for this section. See if there
- // is another relocation section.
- typename RelocMap_t::mapped_type relocseclist =
- SectionRelocMap.lookup(getSection(Rel.w.a));
-
- // Do a binary search for the current reloc section index (which must be
- // present). Then get the next one.
- typename RelocMap_t::mapped_type::const_iterator loc =
- std::lower_bound(relocseclist.begin(), relocseclist.end(), Rel.w.b);
- ++loc;
-
- // If there is no next one, don't do anything. The ++Rel.w.c above sets Rel
- // to the end iterator.
- if (loc != relocseclist.end()) {
- Rel.w.b = *loc;
- Rel.w.a = 0;
- }
- }
- Result = RelocationRef(Rel, this);
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationSymbol(DataRefImpl Rel,
- SymbolRef &Result) const {
- uint32_t symbolIdx;
- const Elf_Shdr *sec = getSection(Rel.w.b);
- switch (sec->sh_type) {
- default :
- report_fatal_error("Invalid section type in Rel!");
- case ELF::SHT_REL : {
- symbolIdx = getRel(Rel)->getSymbol();
- break;
- }
- case ELF::SHT_RELA : {
- symbolIdx = getRela(Rel)->getSymbol();
- break;
- }
- }
- DataRefImpl SymbolData;
- IndexMap_t::const_iterator it = SymbolTableSectionsIndexMap.find(sec->sh_link);
- if (it == SymbolTableSectionsIndexMap.end())
- report_fatal_error("Relocation symbol table not found!");
- SymbolData.d.a = symbolIdx;
- SymbolData.d.b = it->second;
- Result = SymbolRef(SymbolData, this);
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationAddress(DataRefImpl Rel,
- uint64_t &Result) const {
- uint64_t offset;
- const Elf_Shdr *sec = getSection(Rel.w.b);
- switch (sec->sh_type) {
- default :
- report_fatal_error("Invalid section type in Rel!");
- case ELF::SHT_REL : {
- offset = getRel(Rel)->r_offset;
- break;
- }
- case ELF::SHT_RELA : {
- offset = getRela(Rel)->r_offset;
- break;
- }
- }
-
- Result = offset;
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationType(DataRefImpl Rel,
- uint32_t &Result) const {
- const Elf_Shdr *sec = getSection(Rel.w.b);
- switch (sec->sh_type) {
- default :
- report_fatal_error("Invalid section type in Rel!");
- case ELF::SHT_REL : {
- Result = getRel(Rel)->getType();
- break;
- }
- case ELF::SHT_RELA : {
- Result = getRela(Rel)->getType();
- break;
- }
- }
- return object_error::success;
-}
-
-#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
- case ELF::enum: res = #enum; break;
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationTypeName(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
- const Elf_Shdr *sec = getSection(Rel.w.b);
- uint8_t type;
- StringRef res;
- switch (sec->sh_type) {
- default :
- return object_error::parse_failed;
- case ELF::SHT_REL : {
- type = getRel(Rel)->getType();
- break;
- }
- case ELF::SHT_RELA : {
- type = getRela(Rel)->getType();
- break;
- }
- }
- switch (Header->e_machine) {
- case ELF::EM_X86_64:
- switch (type) {
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
- default:
- res = "Unknown";
- }
- break;
- case ELF::EM_386:
- switch (type) {
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
- LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
- default:
- res = "Unknown";
- }
- break;
- default:
- res = "Unknown";
- }
- Result.append(res.begin(), res.end());
- return object_error::success;
-}
-
-#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationAdditionalInfo(DataRefImpl Rel,
- int64_t &Result) const {
- const Elf_Shdr *sec = getSection(Rel.w.b);
- switch (sec->sh_type) {
- default :
- report_fatal_error("Invalid section type in Rel!");
- case ELF::SHT_REL : {
- Result = 0;
- return object_error::success;
- }
- case ELF::SHT_RELA : {
- Result = getRela(Rel)->r_addend;
- return object_error::success;
- }
- }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
- const Elf_Shdr *sec = getSection(Rel.w.b);
- uint8_t type;
- StringRef res;
- int64_t addend = 0;
- uint16_t symbol_index = 0;
- switch (sec->sh_type) {
- default :
- return object_error::parse_failed;
- case ELF::SHT_REL : {
- type = getRel(Rel)->getType();
- symbol_index = getRel(Rel)->getSymbol();
- // TODO: Read implicit addend from section data.
- break;
- }
- case ELF::SHT_RELA : {
- type = getRela(Rel)->getType();
- symbol_index = getRela(Rel)->getSymbol();
- addend = getRela(Rel)->r_addend;
- break;
- }
- }
- const Elf_Sym *symb = getEntry<Elf_Sym>(sec->sh_link, symbol_index);
- StringRef symname;
- if (error_code ec = getSymbolName(symb, symname))
- return ec;
- switch (Header->e_machine) {
- case ELF::EM_X86_64:
- switch (type) {
- case ELF::R_X86_64_32S:
- res = symname;
- break;
- case ELF::R_X86_64_PC32: {
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
- fmt << symname << (addend < 0 ? "" : "+") << addend << "-P";
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
- }
- break;
- default:
- res = "Unknown";
- }
- break;
- default:
- res = "Unknown";
- }
- if (Result.empty())
- Result.append(res.begin(), res.end());
- return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
- , error_code &ec)
- : ObjectFile(Binary::isELF, Object, ec)
- , SectionHeaderTable(0)
- , dot_shstrtab_sec(0)
- , dot_strtab_sec(0) {
- Header = reinterpret_cast<const Elf_Ehdr *>(base());
-
- if (Header->e_shoff == 0)
- return;
-
- SectionHeaderTable =
- reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
- uint64_t SectionTableSize = getNumSections() * Header->e_shentsize;
- if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize
- <= base() + Data->getBufferSize()))
- // FIXME: Proper error handling.
- report_fatal_error("Section table goes past end of file!");
-
-
- // To find the symbol tables we walk the section table to find SHT_SYMTAB.
- const Elf_Shdr* SymbolTableSectionHeaderIndex = 0;
- const Elf_Shdr* sh = reinterpret_cast<const Elf_Shdr*>(SectionHeaderTable);
- for (uint64_t i = 0, e = getNumSections(); i != e; ++i) {
- if (sh->sh_type == ELF::SHT_SYMTAB_SHNDX) {
- if (SymbolTableSectionHeaderIndex)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .symtab_shndx!");
- SymbolTableSectionHeaderIndex = sh;
- }
- if (sh->sh_type == ELF::SHT_SYMTAB) {
- SymbolTableSectionsIndexMap[i] = SymbolTableSections.size();
- SymbolTableSections.push_back(sh);
- }
- if (sh->sh_type == ELF::SHT_REL || sh->sh_type == ELF::SHT_RELA) {
- SectionRelocMap[getSection(sh->sh_info)].push_back(i);
- }
- ++sh;
- }
-
- // Sort section relocation lists by index.
- for (typename RelocMap_t::iterator i = SectionRelocMap.begin(),
- e = SectionRelocMap.end(); i != e; ++i) {
- std::sort(i->second.begin(), i->second.end());
- }
-
- // Get string table sections.
- dot_shstrtab_sec = getSection(getStringTableIndex());
- if (dot_shstrtab_sec) {
- // Verify that the last byte in the string table in a null.
- if (((const char*)base() + dot_shstrtab_sec->sh_offset)
- [dot_shstrtab_sec->sh_size - 1] != 0)
- // FIXME: Proper error handling.
- report_fatal_error("String table must end with a null terminator!");
- }
-
- // Merge this into the above loop.
- for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
- *e = i + getNumSections() * Header->e_shentsize;
- i != e; i += Header->e_shentsize) {
- const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
- if (sh->sh_type == ELF::SHT_STRTAB) {
- StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
- if (SectionName == ".strtab") {
- if (dot_strtab_sec != 0)
- // FIXME: Proper error handling.
- report_fatal_error("Already found section named .strtab!");
- dot_strtab_sec = sh;
- const char *dot_strtab = (const char*)base() + sh->sh_offset;
- if (dot_strtab[sh->sh_size - 1] != 0)
- // FIXME: Proper error handling.
- report_fatal_error("String table must end with a null terminator!");
- }
- }
- }
-
- // Build symbol name side-mapping if there is one.
- if (SymbolTableSectionHeaderIndex) {
- const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
- SymbolTableSectionHeaderIndex->sh_offset);
- error_code ec;
- for (symbol_iterator si = begin_symbols(),
- se = end_symbols(); si != se; si.increment(ec)) {
- if (ec)
- report_fatal_error("Fewer extended symbol table entries than symbols!");
- if (*ShndxTable != ELF::SHN_UNDEF)
- ExtendedSymbolTable[getSymbol(si->getRawDataRefImpl())] = *ShndxTable;
- ++ShndxTable;
- }
- }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
- ::begin_symbols() const {
- DataRefImpl SymbolData;
- memset(&SymbolData, 0, sizeof(SymbolData));
- if (SymbolTableSections.size() == 0) {
- SymbolData.d.a = std::numeric_limits<uint32_t>::max();
- SymbolData.d.b = std::numeric_limits<uint32_t>::max();
- } else {
- SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
- SymbolData.d.b = 0;
- }
- return symbol_iterator(SymbolRef(SymbolData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
- ::end_symbols() const {
- DataRefImpl SymbolData;
- memset(&SymbolData, 0, sizeof(SymbolData));
- SymbolData.d.a = std::numeric_limits<uint32_t>::max();
- SymbolData.d.b = std::numeric_limits<uint32_t>::max();
- return symbol_iterator(SymbolRef(SymbolData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-section_iterator ELFObjectFile<target_endianness, is64Bits>
- ::begin_sections() const {
- DataRefImpl ret;
- memset(&ret, 0, sizeof(DataRefImpl));
- ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
- return section_iterator(SectionRef(ret, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-section_iterator ELFObjectFile<target_endianness, is64Bits>
- ::end_sections() const {
- DataRefImpl ret;
- memset(&ret, 0, sizeof(DataRefImpl));
- ret.p = reinterpret_cast<intptr_t>(base()
- + Header->e_shoff
- + (Header->e_shentsize*getNumSections()));
- return section_iterator(SectionRef(ret, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
- return is64Bits ? 8 : 4;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
- ::getFileFormatName() const {
- switch(Header->e_ident[ELF::EI_CLASS]) {
- case ELF::ELFCLASS32:
- switch(Header->e_machine) {
- case ELF::EM_386:
- return "ELF32-i386";
- case ELF::EM_X86_64:
- return "ELF32-x86-64";
- case ELF::EM_ARM:
- return "ELF32-arm";
- default:
- return "ELF32-unknown";
- }
- case ELF::ELFCLASS64:
- switch(Header->e_machine) {
- case ELF::EM_386:
- return "ELF64-i386";
- case ELF::EM_X86_64:
- return "ELF64-x86-64";
- default:
- return "ELF64-unknown";
- }
- default:
- // FIXME: Proper error handling.
- report_fatal_error("Invalid ELFCLASS!");
- }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
- switch(Header->e_machine) {
- case ELF::EM_386:
- return Triple::x86;
- case ELF::EM_X86_64:
- return Triple::x86_64;
- case ELF::EM_ARM:
- return Triple::arm;
- default:
- return Triple::UnknownArch;
- }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const {
- if (Header->e_shnum == ELF::SHN_UNDEF)
- return SectionHeaderTable->sh_size;
- return Header->e_shnum;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-uint64_t
-ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const {
- if (Header->e_shnum == ELF::SHN_UNDEF) {
- if (Header->e_shstrndx == ELF::SHN_HIRESERVE)
- return SectionHeaderTable->sh_link;
- if (Header->e_shstrndx >= getNumSections())
- return 0;
- }
- return Header->e_shstrndx;
-}
-
-
-template<support::endianness target_endianness, bool is64Bits>
-template<typename T>
-inline const T *
-ELFObjectFile<target_endianness, is64Bits>::getEntry(uint16_t Section,
- uint32_t Entry) const {
- return getEntry<T>(getSection(Section), Entry);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-template<typename T>
-inline const T *
-ELFObjectFile<target_endianness, is64Bits>::getEntry(const Elf_Shdr * Section,
- uint32_t Entry) const {
- return reinterpret_cast<const T *>(
- base()
- + Section->sh_offset
- + (Entry * Section->sh_entsize));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
-ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
- return getEntry<Elf_Sym>(SymbolTableSections[Symb.d.b], Symb.d.a);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rel *
-ELFObjectFile<target_endianness, is64Bits>::getRel(DataRefImpl Rel) const {
- return getEntry<Elf_Rel>(Rel.w.b, Rel.w.c);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rela *
-ELFObjectFile<target_endianness, is64Bits>::getRela(DataRefImpl Rela) const {
- return getEntry<Elf_Rela>(Rela.w.b, Rela.w.c);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
- const Elf_Shdr *sec = getSection(Symb.d.b);
- if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM)
- // FIXME: Proper error handling.
- report_fatal_error("Invalid symbol table section!");
- return sec;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const {
- if (index == 0)
- return 0;
- if (!SectionHeaderTable || index >= getNumSections())
- // FIXME: Proper error handling.
- report_fatal_error("Invalid section index!");
-
- return reinterpret_cast<const Elf_Shdr *>(
- reinterpret_cast<const char *>(SectionHeaderTable)
- + (index * Header->e_shentsize));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const char *ELFObjectFile<target_endianness, is64Bits>
- ::getString(uint32_t section,
- ELF::Elf32_Word offset) const {
- return getString(getSection(section), offset);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const char *ELFObjectFile<target_endianness, is64Bits>
- ::getString(const Elf_Shdr *section,
- ELF::Elf32_Word offset) const {
- assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
- if (offset >= section->sh_size)
- // FIXME: Proper error handling.
- report_fatal_error("Symbol name offset outside of string table!");
- return (const char *)base() + section->sh_offset + offset;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolName(const Elf_Sym *symb,
- StringRef &Result) const {
- if (symb->st_name == 0) {
- const Elf_Shdr *section = getSection(symb);
- if (!section)
- Result = "";
- else
- Result = getString(dot_shstrtab_sec, section->sh_name);
- return object_error::success;
- }
-
- // Use the default symbol table name section.
- Result = getString(dot_strtab_sec, symb->st_name);
- return object_error::success;
-}
-
-// EI_CLASS, EI_DATA.
-static std::pair<unsigned char, unsigned char>
-getElfArchType(MemoryBuffer *Object) {
- if (Object->getBufferSize() < ELF::EI_NIDENT)
- return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
- return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
- , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
-}
-
-namespace llvm {
-
- ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
- std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
- error_code ec;
- if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
- return new ELFObjectFile<support::little, false>(Object, ec);
- else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
- return new ELFObjectFile<support::big, false>(Object, ec);
- else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
- return new ELFObjectFile<support::little, true>(Object, ec);
- else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
- return new ELFObjectFile<support::big, true>(Object, ec);
- // FIXME: Proper error handling.
- report_fatal_error("Not an ELF object file!");
- }
-
} // end namespace llvm
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
new file mode 100644
index 000000000000..69610f991fdc
--- /dev/null
+++ b/lib/Object/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Object/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Object
+parent = Libraries
+required_libraries = Support
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index 9cdac8681ddd..b7e5cdcd6b84 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -10,11 +10,12 @@
#include "llvm/Object/MachOObject.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/SwapByteOrder.h"
using namespace llvm;
using namespace llvm::object;
@@ -359,25 +360,13 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
void MachOObject::ReadULEB128s(uint64_t Index,
SmallVectorImpl<uint64_t> &Out) const {
- const char *ptr = Buffer->getBufferStart() + Index;
+ DataExtractor extractor(Buffer->getBuffer(), true, 0);
+
+ uint32_t offset = Index;
uint64_t data = 0;
- uint64_t delta = 0;
- uint32_t shift = 0;
- while (true) {
- assert(ptr < Buffer->getBufferEnd() && "index out of bounds");
- assert(shift < 64 && "too big for uint64_t");
-
- uint8_t byte = *ptr++;
- delta |= ((byte & 0x7F) << shift);
- shift += 7;
- if (byte < 0x80) {
- if (delta == 0)
- break;
- data += delta;
- Out.push_back(data);
- delta = 0;
- shift = 0;
- }
+ while (uint64_t delta = extractor.getULEB128(&offset)) {
+ data += delta;
+ Out.push_back(data);
}
}
@@ -393,7 +382,7 @@ void MachOObject::printHeader(raw_ostream &O) const {
O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
O << "('flag', " << Header.Flags << ")\n";
-
+
// Print extended header if 64-bit.
if (is64Bit())
O << "('reserved', " << Header64Ext.Reserved << ")\n";
@@ -403,6 +392,6 @@ void MachOObject::print(raw_ostream &O) const {
O << "Header:\n";
printHeader(O);
O << "Load Commands:\n";
-
+
O << "Buffer:\n";
}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 507df5865eb1..3bcda1700b8c 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
@@ -29,11 +30,10 @@ namespace object {
MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
error_code &ec)
- : ObjectFile(Binary::isMachO, Object, ec),
+ : ObjectFile(Binary::ID_MachO, Object, ec),
MachOObj(MOO),
RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
DataRefImpl DRI;
- DRI.d.a = DRI.d.b = 0;
moveToNextSection(DRI);
uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
while (DRI.d.a < LoadCommandCount) {
@@ -124,23 +124,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI,
- uint64_t &Result) const {
- uint64_t SectionOffset;
- uint8_t SectionIndex;
+error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
+ uint64_t &Result) const {
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Symbol64TableEntry> Entry;
getSymbol64TableEntry(DRI, Entry);
Result = Entry->Value;
- SectionIndex = Entry->SectionIndex;
+ if (Entry->SectionIndex) {
+ InMemoryStruct<macho::Section64> Section;
+ getSection64(Sections[Entry->SectionIndex-1], Section);
+ Result += Section->Offset - Section->Address;
+ }
} else {
InMemoryStruct<macho::SymbolTableEntry> Entry;
getSymbolTableEntry(DRI, Entry);
Result = Entry->Value;
- SectionIndex = Entry->SectionIndex;
+ if (Entry->SectionIndex) {
+ InMemoryStruct<macho::Section> Section;
+ getSection(Sections[Entry->SectionIndex-1], Section);
+ Result += Section->Offset - Section->Address;
+ }
}
- getSectionAddress(Sections[SectionIndex-1], SectionOffset);
- Result -= SectionOffset;
return object_error::success;
}
@@ -161,7 +165,74 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
uint64_t &Result) const {
- Result = UnknownAddressOrSize;
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ uint64_t BeginOffset;
+ uint64_t EndOffset = 0;
+ uint8_t SectionIndex;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ BeginOffset = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ if (!SectionIndex) {
+ uint32_t flags = SymbolRef::SF_None;
+ getSymbolFlags(DRI, flags);
+ if (flags & SymbolRef::SF_Common)
+ Result = Entry->Value;
+ else
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ DRI.d.b = 0;
+ uint32_t Command = DRI.d.a;
+ while (Command == DRI.d.a) {
+ moveToNextSymbol(DRI);
+ if (DRI.d.a < LoadCommandCount) {
+ getSymbol64TableEntry(DRI, Entry);
+ if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+ if (!EndOffset || Entry->Value < EndOffset)
+ EndOffset = Entry->Value;
+ }
+ DRI.d.b++;
+ }
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ BeginOffset = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ if (!SectionIndex) {
+ uint32_t flags = SymbolRef::SF_None;
+ getSymbolFlags(DRI, flags);
+ if (flags & SymbolRef::SF_Common)
+ Result = Entry->Value;
+ else
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ DRI.d.b = 0;
+ uint32_t Command = DRI.d.a;
+ while (Command == DRI.d.a) {
+ moveToNextSymbol(DRI);
+ if (DRI.d.a < LoadCommandCount) {
+ getSymbolTableEntry(DRI, Entry);
+ if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+ if (!EndOffset || Entry->Value < EndOffset)
+ EndOffset = Entry->Value;
+ }
+ DRI.d.b++;
+ }
+ }
+ if (!EndOffset) {
+ uint64_t Size;
+ getSectionSize(Sections[SectionIndex-1], Size);
+ getSectionAddress(Sections[SectionIndex-1], EndOffset);
+ EndOffset += Size;
+ }
+ Result = EndOffset - BeginOffset;
return object_error::success;
}
@@ -200,36 +271,69 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
return object_error::success;
}
-error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
- bool &Result) const {
+error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
+ uint32_t &Result) const {
+ uint16_t MachOFlags;
+ uint8_t MachOType;
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Symbol64TableEntry> Entry;
getSymbol64TableEntry(DRI, Entry);
- Result = Entry->Flags & macho::STF_StabsEntryMask;
+ MachOFlags = Entry->Flags;
+ MachOType = Entry->Type;
} else {
InMemoryStruct<macho::SymbolTableEntry> Entry;
getSymbolTableEntry(DRI, Entry);
- Result = Entry->Flags & macho::STF_StabsEntryMask;
+ MachOFlags = Entry->Flags;
+ MachOType = Entry->Type;
}
+
+ // TODO: Correctly set SF_ThreadLocal
+ Result = SymbolRef::SF_None;
+
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+ Result |= SymbolRef::SF_Undefined;
+
+ if (MachOFlags & macho::STF_StabsEntryMask)
+ Result |= SymbolRef::SF_FormatSpecific;
+
+ if (MachOType & MachO::NlistMaskExternal) {
+ Result |= SymbolRef::SF_Global;
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+ Result |= SymbolRef::SF_Common;
+ }
+
+ if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
+ Result |= SymbolRef::SF_Weak;
+
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute)
+ Result |= SymbolRef::SF_Absolute;
+
return object_error::success;
}
-error_code MachOObjectFile::isSymbolGlobal(DataRefImpl Symb, bool &Res) const {
-
+error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+ section_iterator &Res) const {
+ uint8_t index;
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Symbol64TableEntry> Entry;
getSymbol64TableEntry(Symb, Entry);
- Res = Entry->Type & MachO::NlistMaskExternal;
+ index = Entry->SectionIndex;
} else {
InMemoryStruct<macho::SymbolTableEntry> Entry;
getSymbolTableEntry(Symb, Entry);
- Res = Entry->Type & MachO::NlistMaskExternal;
+ index = Entry->SectionIndex;
}
+
+ if (index == 0)
+ Res = end_sections();
+ else
+ Res = section_iterator(SectionRef(Sections[index-1], this));
+
return object_error::success;
}
error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
- SymbolRef::SymbolType &Res) const {
+ SymbolRef::Type &Res) const {
uint8_t n_type;
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Symbol64TableEntry> Entry;
@@ -243,12 +347,14 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
Res = SymbolRef::ST_Other;
// If this is a STAB debugging symbol, we can do nothing more.
- if (n_type & MachO::NlistMaskStab)
+ if (n_type & MachO::NlistMaskStab) {
+ Res = SymbolRef::ST_Debug;
return object_error::success;
+ }
switch (n_type & MachO::NlistMaskType) {
case MachO::NListTypeUndefined :
- Res = SymbolRef::ST_External;
+ Res = SymbolRef::ST_Unknown;
break;
case MachO::NListTypeSection :
Res = SymbolRef::ST_Function;
@@ -261,7 +367,6 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
symbol_iterator MachOObjectFile::begin_symbols() const {
// DRI.d.a = segment number; DRI.d.b = symbol index.
DataRefImpl DRI;
- DRI.d.a = DRI.d.b = 0;
moveToNextSymbol(DRI);
return symbol_iterator(SymbolRef(DRI, this));
}
@@ -269,10 +374,33 @@ symbol_iterator MachOObjectFile::begin_symbols() const {
symbol_iterator MachOObjectFile::end_symbols() const {
DataRefImpl DRI;
DRI.d.a = MachOObj->getHeader().NumLoadCommands;
- DRI.d.b = 0;
return symbol_iterator(SymbolRef(DRI, this));
}
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+StringRef MachOObjectFile::getLoadName() const {
+ // TODO: Implement
+ report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
/*===-- Sections ----------------------------------------------------------===*/
@@ -451,12 +579,43 @@ error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
return object_error::success;
}
+error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented
+ Result = true;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
+ bool &Result) const {
+ // FIXME: Unimplemented
+ Result = false;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
+ bool &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = (Sect->Flags & MachO::SectionTypeZeroFill ||
+ Sect->Flags & MachO::SectionTypeZeroFillLarge);
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = (Sect->Flags & MachO::SectionTypeZeroFill ||
+ Sect->Flags & MachO::SectionTypeZeroFillLarge);
+ }
+
+ return object_error::success;
+}
+
error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
DataRefImpl Symb,
bool &Result) const {
- SymbolRef::SymbolType ST;
+ SymbolRef::Type ST;
getSymbolType(Symb, ST);
- if (ST == SymbolRef::ST_External) {
+ if (ST == SymbolRef::ST_Unknown) {
Result = false;
return object_error::success;
}
@@ -483,7 +642,6 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
DataRefImpl ret;
- ret.d.a = 0;
ret.d.b = getSectionIndex(Sec);
return relocation_iterator(RelocationRef(ret, this));
}
@@ -506,7 +664,6 @@ relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
section_iterator MachOObjectFile::begin_sections() const {
DataRefImpl DRI;
- DRI.d.a = DRI.d.b = 0;
moveToNextSection(DRI);
return section_iterator(SectionRef(DRI, this));
}
@@ -514,7 +671,6 @@ section_iterator MachOObjectFile::begin_sections() const {
section_iterator MachOObjectFile::end_sections() const {
DataRefImpl DRI;
DRI.d.a = MachOObj->getHeader().NumLoadCommands;
- DRI.d.b = 0;
return section_iterator(SectionRef(DRI, this));
}
@@ -543,19 +699,43 @@ error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
}
error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
uint64_t &Res) const {
- const uint8_t* sectAddress = base();
+ const uint8_t* sectAddress = 0;
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Section64> Sect;
getSection64(Sections[Rel.d.b], Sect);
- sectAddress += Sect->Offset;
+ sectAddress += Sect->Address;
} else {
InMemoryStruct<macho::Section> Sect;
getSection(Sections[Rel.d.b], Sect);
- sectAddress += Sect->Offset;
+ sectAddress += Sect->Address;
}
InMemoryStruct<macho::RelocationEntry> RE;
getRelocation(Rel, RE);
- Res = reinterpret_cast<uintptr_t>(sectAddress + RE->Word0);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ uint64_t RelAddr = 0;
+ if (isScattered)
+ RelAddr = RE->Word0 & 0xFFFFFF;
+ else
+ RelAddr = RE->Word0;
+
+ Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ if (isScattered)
+ Res = RE->Word0 & 0xFFFFFF;
+ else
+ Res = RE->Word0;
return object_error::success;
}
error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
@@ -566,7 +746,6 @@ error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
bool isExtern = (RE->Word1 >> 27) & 1;
DataRefImpl Sym;
- Sym.d.a = Sym.d.b = 0;
moveToNextSymbol(Sym);
if (isExtern) {
for (unsigned i = 0; i < SymbolIdx; i++) {
@@ -580,14 +759,112 @@ error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
return object_error::success;
}
error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
- uint32_t &Res) const {
+ uint64_t &Res) const {
InMemoryStruct<macho::RelocationEntry> RE;
getRelocation(Rel, RE);
- Res = RE->Word1;
+ Res = RE->Word0;
+ Res <<= 32;
+ Res |= RE->Word1;
return object_error::success;
}
error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
+ // TODO: Support scattered relocations.
+ StringRef res;
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+
+ unsigned r_type;
+ if (isScattered)
+ r_type = (RE->Word0 >> 24) & 0xF;
+ else
+ r_type = (RE->Word1 >> 28) & 0xF;
+
+ switch (Arch) {
+ case Triple::x86: {
+ const char* Table[] = {
+ "GENERIC_RELOC_VANILLA",
+ "GENERIC_RELOC_PAIR",
+ "GENERIC_RELOC_SECTDIFF",
+ "GENERIC_RELOC_PB_LA_PTR",
+ "GENERIC_RELOC_LOCAL_SECTDIFF",
+ "GENERIC_RELOC_TLV" };
+
+ if (r_type > 6)
+ res = "Unknown";
+ else
+ res = Table[r_type];
+ break;
+ }
+ case Triple::x86_64: {
+ const char* Table[] = {
+ "X86_64_RELOC_UNSIGNED",
+ "X86_64_RELOC_SIGNED",
+ "X86_64_RELOC_BRANCH",
+ "X86_64_RELOC_GOT_LOAD",
+ "X86_64_RELOC_GOT",
+ "X86_64_RELOC_SUBTRACTOR",
+ "X86_64_RELOC_SIGNED_1",
+ "X86_64_RELOC_SIGNED_2",
+ "X86_64_RELOC_SIGNED_4",
+ "X86_64_RELOC_TLV" };
+
+ if (r_type > 9)
+ res = "Unknown";
+ else
+ res = Table[r_type];
+ break;
+ }
+ case Triple::arm: {
+ const char* Table[] = {
+ "ARM_RELOC_VANILLA",
+ "ARM_RELOC_PAIR",
+ "ARM_RELOC_SECTDIFF",
+ "ARM_RELOC_LOCAL_SECTDIFF",
+ "ARM_RELOC_PB_LA_PTR",
+ "ARM_RELOC_BR24",
+ "ARM_THUMB_RELOC_BR22",
+ "ARM_THUMB_32BIT_BRANCH",
+ "ARM_RELOC_HALF",
+ "ARM_RELOC_HALF_SECTDIFF" };
+
+ if (r_type > 9)
+ res = "Unknown";
+ else
+ res = Table[r_type];
+ break;
+ }
+ case Triple::ppc: {
+ const char* Table[] = {
+ "PPC_RELOC_VANILLA",
+ "PPC_RELOC_PAIR",
+ "PPC_RELOC_BR14",
+ "PPC_RELOC_BR24",
+ "PPC_RELOC_HI16",
+ "PPC_RELOC_LO16",
+ "PPC_RELOC_HA16",
+ "PPC_RELOC_LO14",
+ "PPC_RELOC_SECTDIFF",
+ "PPC_RELOC_PB_LA_PTR",
+ "PPC_RELOC_HI16_SECTDIFF",
+ "PPC_RELOC_LO16_SECTDIFF",
+ "PPC_RELOC_HA16_SECTDIFF",
+ "PPC_RELOC_JBSR",
+ "PPC_RELOC_LO14_SECTDIFF",
+ "PPC_RELOC_LOCAL_SECTDIFF" };
+
+ res = Table[r_type];
+ break;
+ }
+ case Triple::UnknownArch:
+ res = "Unknown";
+ break;
+ }
+ Result.append(res.begin(), res.end());
return object_error::success;
}
error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
@@ -611,11 +888,356 @@ error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
}
return object_error::success;
}
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+error_code advance(T &it, size_t Val) {
+ error_code ec;
+ while (Val--) {
+ it.increment(ec);
+ }
+ return ec;
+}
+
+template<class T>
+void advanceTo(T &it, size_t Val) {
+ if (error_code ec = advance(it, Val))
+ report_fatal_error(ec.message());
+}
+
+void MachOObjectFile::printRelocationTargetName(
+ InMemoryStruct<macho::RelocationEntry>& RE,
+ raw_string_ostream &fmt) const {
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+
+ // Target of a scattered relocation is an address. In the interest of
+ // generating pretty output, scan through the symbol table looking for a
+ // symbol that aligns with that address. If we find one, print it.
+ // Otherwise, we just print the hex address of the target.
+ if (isScattered) {
+ uint32_t Val = RE->Word1;
+
+ error_code ec;
+ for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
+ SI.increment(ec)) {
+ if (ec) report_fatal_error(ec.message());
+
+ uint64_t Addr;
+ StringRef Name;
+
+ if ((ec = SI->getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val) continue;
+ if ((ec = SI->getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+
+ // If we couldn't find a symbol that this relocation refers to, try
+ // to find a section beginning instead.
+ for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
+ SI.increment(ec)) {
+ if (ec) report_fatal_error(ec.message());
+
+ uint64_t Addr;
+ StringRef Name;
+
+ if ((ec = SI->getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val) continue;
+ if ((ec = SI->getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+
+ fmt << format("0x%x", Val);
+ return;
+ }
+
+ StringRef S;
+ bool isExtern = (RE->Word1 >> 27) & 1;
+ uint32_t Val = RE->Word1 & 0xFFFFFF;
+
+ if (isExtern) {
+ symbol_iterator SI = begin_symbols();
+ advanceTo(SI, Val);
+ SI->getName(S);
+ } else {
+ section_iterator SI = begin_sections();
+ advanceTo(SI, Val);
+ SI->getName(S);
+ }
+
+ fmt << S;
+}
+
error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+
+ unsigned Type;
+ if (isScattered)
+ Type = (RE->Word0 >> 24) & 0xF;
+ else
+ Type = (RE->Word1 >> 28) & 0xF;
+
+ bool isPCRel;
+ if (isScattered)
+ isPCRel = ((RE->Word0 >> 30) & 1);
+ else
+ isPCRel = ((RE->Word1 >> 24) & 1);
+
+ // Determine any addends that should be displayed with the relocation.
+ // These require decoding the relocation type, which is triple-specific.
+
+ // X86_64 has entirely custom relocation types.
+ if (Arch == Triple::x86_64) {
+ bool isPCRel = ((RE->Word1 >> 24) & 1);
+
+ switch (Type) {
+ case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD
+ case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT
+ printRelocationTargetName(RE, fmt);
+ fmt << "@GOT";
+ if (isPCRel) fmt << "PCREL";
+ break;
+ }
+ case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // X86_64_SUBTRACTOR must be followed by a relocation of type
+ // X86_64_RELOC_UNSIGNED.
+ // NOTE: Scattered relocations don't exist on x86_64.
+ unsigned RType = (RENext->Word1 >> 28) & 0xF;
+ if (RType != 0)
+ report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
+ "X86_64_RELOC_SUBTRACTOR.");
+
+ // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
+ // X86_64_SUBTRACTOR contains to the subtrahend.
+ printRelocationTargetName(RENext, fmt);
+ fmt << "-";
+ printRelocationTargetName(RE, fmt);
+ }
+ case macho::RIT_X86_64_TLV:
+ printRelocationTargetName(RE, fmt);
+ fmt << "@TLV";
+ if (isPCRel) fmt << "P";
+ break;
+ case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
+ printRelocationTargetName(RE, fmt);
+ fmt << "-1";
+ break;
+ case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
+ printRelocationTargetName(RE, fmt);
+ fmt << "-2";
+ break;
+ case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
+ printRelocationTargetName(RE, fmt);
+ fmt << "-4";
+ break;
+ default:
+ printRelocationTargetName(RE, fmt);
+ break;
+ }
+ // X86 and ARM share some relocation types in common.
+ } else if (Arch == Triple::x86 || Arch == Triple::arm) {
+ // Generic relocation types...
+ switch (Type) {
+ case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
+ return object_error::success;
+ case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // X86 sect diff's must be followed by a relocation of type
+ // GENERIC_RELOC_PAIR.
+ bool isNextScattered = (Arch != Triple::x86_64) &&
+ (RENext->Word0 & macho::RF_Scattered);
+ unsigned RType;
+ if (isNextScattered)
+ RType = (RENext->Word0 >> 24) & 0xF;
+ else
+ RType = (RENext->Word1 >> 28) & 0xF;
+ if (RType != 1)
+ report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_SECTDIFF.");
+
+ printRelocationTargetName(RE, fmt);
+ fmt << "-";
+ printRelocationTargetName(RENext, fmt);
+ break;
+ }
+ }
+
+ if (Arch == Triple::x86) {
+ // All X86 relocations that need special printing were already
+ // handled in the generic code.
+ switch (Type) {
+ case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // X86 sect diff's must be followed by a relocation of type
+ // GENERIC_RELOC_PAIR.
+ bool isNextScattered = (Arch != Triple::x86_64) &&
+ (RENext->Word0 & macho::RF_Scattered);
+ unsigned RType;
+ if (isNextScattered)
+ RType = (RENext->Word0 >> 24) & 0xF;
+ else
+ RType = (RENext->Word1 >> 28) & 0xF;
+ if (RType != 1)
+ report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_LOCAL_SECTDIFF.");
+
+ printRelocationTargetName(RE, fmt);
+ fmt << "-";
+ printRelocationTargetName(RENext, fmt);
+ break;
+ }
+ case macho::RIT_Generic_TLV: {
+ printRelocationTargetName(RE, fmt);
+ fmt << "@TLV";
+ if (isPCRel) fmt << "P";
+ break;
+ }
+ default:
+ printRelocationTargetName(RE, fmt);
+ }
+ } else { // ARM-specific relocations
+ switch (Type) {
+ case macho::RIT_ARM_Half: // ARM_RELOC_HALF
+ case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
+ // Half relocations steal a bit from the length field to encode
+ // whether this is an upper16 or a lower16 relocation.
+ bool isUpper;
+ if (isScattered)
+ isUpper = (RE->Word0 >> 28) & 1;
+ else
+ isUpper = (RE->Word1 >> 25) & 1;
+
+ if (isUpper)
+ fmt << ":upper16:(";
+ else
+ fmt << ":lower16:(";
+ printRelocationTargetName(RE, fmt);
+
+ InMemoryStruct<macho::RelocationEntry> RENext;
+ DataRefImpl RelNext = Rel;
+ RelNext.d.a++;
+ getRelocation(RelNext, RENext);
+
+ // ARM half relocs must be followed by a relocation of type
+ // ARM_RELOC_PAIR.
+ bool isNextScattered = (Arch != Triple::x86_64) &&
+ (RENext->Word0 & macho::RF_Scattered);
+ unsigned RType;
+ if (isNextScattered)
+ RType = (RENext->Word0 >> 24) & 0xF;
+ else
+ RType = (RENext->Word1 >> 28) & 0xF;
+
+ if (RType != 1)
+ report_fatal_error("Expected ARM_RELOC_PAIR after "
+ "GENERIC_RELOC_HALF");
+
+ // NOTE: The half of the target virtual address is stashed in the
+ // address field of the secondary relocation, but we can't reverse
+ // engineer the constant offset from it without decoding the movw/movt
+ // instruction to find the other half in its immediate field.
+
+ // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
+ // symbol/section pointer of the follow-on relocation.
+ if (Type == macho::RIT_ARM_HalfDifference) {
+ fmt << "-";
+ printRelocationTargetName(RENext, fmt);
+ }
+
+ fmt << ")";
+ break;
+ }
+ default: {
+ printRelocationTargetName(RE, fmt);
+ }
+ }
+ }
+ } else
+ printRelocationTargetName(RE, fmt);
+
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
+ bool &Result) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ unsigned Type;
+ if (isScattered)
+ Type = (RE->Word0 >> 24) & 0xF;
+ else
+ Type = (RE->Word1 >> 28) & 0xF;
+
+ Result = false;
+
+ // On arches that use the generic relocations, GENERIC_RELOC_PAIR
+ // is always hidden.
+ if (Arch == Triple::x86 || Arch == Triple::arm) {
+ if (Type == macho::RIT_Pair) Result = true;
+ } else if (Arch == Triple::x86_64) {
+ // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
+ // an X864_64_RELOC_SUBTRACTOR.
+ if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
+ DataRefImpl RelPrev = Rel;
+ RelPrev.d.a--;
+ InMemoryStruct<macho::RelocationEntry> REPrev;
+ getRelocation(RelPrev, REPrev);
+
+ unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
+
+ if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+ }
+ }
+
return object_error::success;
}
+error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
+ LibraryRef &Res) const {
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
+ StringRef &Res) const {
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+
/*===-- Miscellaneous -----------------------------------------------------===*/
uint8_t MachOObjectFile::getBytesInAddress() const {
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 2ea8db978670..f061ea7cebed 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -18,6 +18,7 @@
using namespace llvm;
using namespace object;
+// ObjectFile creation
LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
}
@@ -26,6 +27,7 @@ void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
delete unwrap(ObjectFile);
}
+// ObjectFile Section iterators
LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
section_iterator SI = unwrap(ObjectFile)->begin_sections();
return wrap(new section_iterator(SI));
@@ -46,6 +48,34 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
}
+void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
+ LLVMSymbolIteratorRef Sym) {
+ if (error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
+ report_fatal_error(ec.message());
+}
+
+// ObjectFile Symbol iterators
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile) {
+ symbol_iterator SI = unwrap(ObjectFile)->begin_symbols();
+ return wrap(new symbol_iterator(SI));
+}
+
+void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) {
+ delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+ LLVMSymbolIteratorRef SI) {
+ return (*unwrap(SI) == unwrap(ObjectFile)->end_symbols()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) {
+ error_code ec;
+ unwrap(SI)->increment(ec);
+ if (ec) report_fatal_error("LLVMMoveToNextSymbol failed: " + ec.message());
+}
+
+// SectionRef accessors
const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
StringRef ret;
if (error_code ec = (*unwrap(SI))->getName(ret))
@@ -66,3 +96,123 @@ const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
report_fatal_error(ec.message());
return ret.data();
}
+
+uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getAddress(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI,
+ LLVMSymbolIteratorRef Sym) {
+ bool ret;
+ if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+// Section Relocation iterators
+LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) {
+ relocation_iterator SI = (*unwrap(Section))->begin_relocations();
+ return wrap(new relocation_iterator(SI));
+}
+
+void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) {
+ delete unwrap(SI);
+}
+
+LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section,
+ LLVMRelocationIteratorRef SI) {
+ return (*unwrap(SI) == (*unwrap(Section))->end_relocations()) ? 1 : 0;
+}
+
+void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) {
+ error_code ec;
+ unwrap(SI)->increment(ec);
+ if (ec) report_fatal_error("LLVMMoveToNextRelocation failed: " +
+ ec.message());
+}
+
+
+// SymbolRef accessors
+const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
+ StringRef ret;
+ if (error_code ec = (*unwrap(SI))->getName(ret))
+ report_fatal_error(ec.message());
+ return ret.data();
+}
+
+uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getAddress(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getFileOffset(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getSize(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+// RelocationRef accessors
+uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getAddress(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getOffset(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
+ SymbolRef ret;
+ if (error_code ec = (*unwrap(RI))->getSymbol(ret))
+ report_fatal_error(ec.message());
+
+ return wrap(new symbol_iterator(ret));
+}
+
+uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getType(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+// NOTE: Caller takes ownership of returned string.
+const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
+ SmallVector<char, 0> ret;
+ if (error_code ec = (*unwrap(RI))->getTypeName(ret))
+ report_fatal_error(ec.message());
+
+ char *str = static_cast<char*>(malloc(ret.size()));
+ std::copy(ret.begin(), ret.end(), str);
+ return str;
+}
+
+// NOTE: Caller takes ownership of returned string.
+const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) {
+ SmallVector<char, 0> ret;
+ if (error_code ec = (*unwrap(RI))->getValueString(ret))
+ report_fatal_error(ec.message());
+
+ char *str = static_cast<char*>(malloc(ret.size()));
+ std::copy(ret.begin(), ret.end(), str);
+ return str;
+}
+
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 69d8ed0e5e9a..b14df9af64f4 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -21,6 +21,8 @@
using namespace llvm;
using namespace object;
+void ObjectFile::anchor() { }
+
ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
: Binary(Type, source) {
}
@@ -56,7 +58,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
OwningPtr<MemoryBuffer> File;
- if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+ if (MemoryBuffer::getFile(ObjectPath, File))
return NULL;
return createObjectFile(File.take());
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f2388944929b..409d4fbd0ae5 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -14,8 +14,9 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <limits.h>
@@ -1150,9 +1151,6 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
assert(lost_fraction != lfExactlyZero);
switch (rounding_mode) {
- default:
- llvm_unreachable(0);
-
case rmNearestTiesToAway:
return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
@@ -1175,6 +1173,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
case rmTowardNegative:
return sign == true;
}
+ llvm_unreachable("Invalid rounding mode found");
}
APFloat::opStatus
@@ -1854,20 +1853,33 @@ APFloat::convert(const fltSemantics &toSemantics,
lostFraction lostFraction;
unsigned int newPartCount, oldPartCount;
opStatus fs;
+ int shift;
+ const fltSemantics &fromSemantics = *semantics;
- assertArithmeticOK(*semantics);
+ assertArithmeticOK(fromSemantics);
assertArithmeticOK(toSemantics);
lostFraction = lfExactlyZero;
newPartCount = partCountForBits(toSemantics.precision + 1);
oldPartCount = partCount();
+ shift = toSemantics.precision - fromSemantics.precision;
- /* Handle storage complications. If our new form is wider,
- re-allocate our bit pattern into wider storage. If it is
- narrower, we ignore the excess parts, but if narrowing to a
- single part we need to free the old storage.
- Be careful not to reference significandParts for zeroes
- and infinities, since it aborts. */
+ bool X86SpecialNan = false;
+ if (&fromSemantics == &APFloat::x87DoubleExtended &&
+ &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
+ (!(*significandParts() & 0x8000000000000000ULL) ||
+ !(*significandParts() & 0x4000000000000000ULL))) {
+ // x86 has some unusual NaNs which cannot be represented in any other
+ // format; note them here.
+ X86SpecialNan = true;
+ }
+
+ // If this is a truncation, perform the shift before we narrow the storage.
+ if (shift < 0 && (category==fcNormal || category==fcNaN))
+ lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
+
+ // Fix the storage so it can hold to new value.
if (newPartCount > oldPartCount) {
+ // The new type requires more storage; make it available.
integerPart *newParts;
newParts = new integerPart[newPartCount];
APInt::tcSet(newParts, 0, newPartCount);
@@ -1875,61 +1887,36 @@ APFloat::convert(const fltSemantics &toSemantics,
APInt::tcAssign(newParts, significandParts(), oldPartCount);
freeSignificand();
significand.parts = newParts;
- } else if (newPartCount < oldPartCount) {
- /* Capture any lost fraction through truncation of parts so we get
- correct rounding whilst normalizing. */
- if (category==fcNormal)
- lostFraction = lostFractionThroughTruncation
- (significandParts(), oldPartCount, toSemantics.precision);
- if (newPartCount == 1) {
- integerPart newPart = 0;
- if (category==fcNormal || category==fcNaN)
- newPart = significandParts()[0];
- freeSignificand();
- significand.part = newPart;
- }
+ } else if (newPartCount == 1 && oldPartCount != 1) {
+ // Switch to built-in storage for a single part.
+ integerPart newPart = 0;
+ if (category==fcNormal || category==fcNaN)
+ newPart = significandParts()[0];
+ freeSignificand();
+ significand.part = newPart;
}
+ // Now that we have the right storage, switch the semantics.
+ semantics = &toSemantics;
+
+ // If this is an extension, perform the shift now that the storage is
+ // available.
+ if (shift > 0 && (category==fcNormal || category==fcNaN))
+ APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+
if (category == fcNormal) {
- /* Re-interpret our bit-pattern. */
- exponent += toSemantics.precision - semantics->precision;
- semantics = &toSemantics;
fs = normalize(rounding_mode, lostFraction);
*losesInfo = (fs != opOK);
} else if (category == fcNaN) {
- int shift = toSemantics.precision - semantics->precision;
- // Do this now so significandParts gets the right answer
- const fltSemantics *oldSemantics = semantics;
- semantics = &toSemantics;
- *losesInfo = false;
- // No normalization here, just truncate
- if (shift>0)
- APInt::tcShiftLeft(significandParts(), newPartCount, shift);
- else if (shift < 0) {
- unsigned ushift = -shift;
- // Figure out if we are losing information. This happens
- // if are shifting out something other than 0s, or if the x87 long
- // double input did not have its integer bit set (pseudo-NaN), or if the
- // x87 long double input did not have its QNan bit set (because the x87
- // hardware sets this bit when converting a lower-precision NaN to
- // x87 long double).
- if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
- *losesInfo = true;
- if (oldSemantics == &APFloat::x87DoubleExtended &&
- (!(*significandParts() & 0x8000000000000000ULL) ||
- !(*significandParts() & 0x4000000000000000ULL)))
- *losesInfo = true;
- APInt::tcShiftRight(significandParts(), newPartCount, ushift);
- }
+ *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
// gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
// does not give you back the same bits. This is dubious, and we
// don't currently do it. You're really supposed to get
// an invalid operation signal at runtime, but nobody does that.
fs = opOK;
} else {
- semantics = &toSemantics;
- fs = opOK;
*losesInfo = false;
+ fs = opOK;
}
return fs;
@@ -2695,21 +2682,19 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
return writeSignedDecimal (dst, exponent);
}
-// For good performance it is desirable for different APFloats
-// to produce different integers.
-uint32_t
-APFloat::getHashValue() const
-{
- if (category==fcZero) return sign<<8 | semantics->precision ;
- else if (category==fcInfinity) return sign<<9 | semantics->precision;
- else if (category==fcNaN) return 1<<10 | semantics->precision;
- else {
- uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
- const integerPart* p = significandParts();
- for (int i=partCount(); i>0; i--, p++)
- hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
- return hash;
- }
+hash_code llvm::hash_value(const APFloat &Arg) {
+ if (Arg.category != APFloat::fcNormal)
+ return hash_combine((uint8_t)Arg.category,
+ // NaN has no sign, fix it at zero.
+ Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
+ Arg.semantics->precision);
+
+ // Normal floats need their exponent and significand hashed.
+ return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
+ Arg.semantics->precision, Arg.exponent,
+ hash_combine_range(
+ Arg.significandParts(),
+ Arg.significandParts() + Arg.partCount()));
}
// Conversion from APFloat to/from host float/double. It may eventually be
@@ -3354,7 +3339,7 @@ namespace {
// Rounding down is just a truncation, except we also want to drop
// trailing zeros from the new result.
if (buffer[FirstSignificant - 1] < '5') {
- while (buffer[FirstSignificant] == '0')
+ while (FirstSignificant < N && buffer[FirstSignificant] == '0')
FirstSignificant++;
exp += FirstSignificant;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 3774c5223c46..9b81fe776a61 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -14,9 +14,10 @@
#define DEBUG_TYPE "apint"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -456,16 +457,6 @@ APInt APInt::XorSlowCase(const APInt& RHS) const {
return APInt(val, getBitWidth()).clearUnusedBits();
}
-bool APInt::operator !() const {
- if (isSingleWord())
- return !VAL;
-
- for (unsigned i = 0; i < getNumWords(); ++i)
- if (pVal[i])
- return false;
- return true;
-}
-
APInt APInt::operator*(const APInt& RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord())
@@ -493,12 +484,6 @@ APInt APInt::operator-(const APInt& RHS) const {
return Result.clearUnusedBits();
}
-bool APInt::operator[](unsigned bitPosition) const {
- assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
- return (maskBit(bitPosition) &
- (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
-}
-
bool APInt::EqualSlowCase(const APInt& RHS) const {
// Get some facts about the number of bits used in the two operands.
unsigned n1 = getActiveBits();
@@ -675,93 +660,11 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
}
}
-// From http://www.burtleburtle.net, byBob Jenkins.
-// When targeting x86, both GCC and LLVM seem to recognize this as a
-// rotate instruction.
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-// From http://www.burtleburtle.net, by Bob Jenkins.
-#define mix(a,b,c) \
- { \
- a -= c; a ^= rot(c, 4); c += b; \
- b -= a; b ^= rot(a, 6); a += c; \
- c -= b; c ^= rot(b, 8); b += a; \
- a -= c; a ^= rot(c,16); c += b; \
- b -= a; b ^= rot(a,19); a += c; \
- c -= b; c ^= rot(b, 4); b += a; \
- }
-
-// From http://www.burtleburtle.net, by Bob Jenkins.
-#define final(a,b,c) \
- { \
- c ^= b; c -= rot(b,14); \
- a ^= c; a -= rot(c,11); \
- b ^= a; b -= rot(a,25); \
- c ^= b; c -= rot(b,16); \
- a ^= c; a -= rot(c,4); \
- b ^= a; b -= rot(a,14); \
- c ^= b; c -= rot(b,24); \
- }
-
-// hashword() was adapted from http://www.burtleburtle.net, by Bob
-// Jenkins. k is a pointer to an array of uint32_t values; length is
-// the length of the key, in 32-bit chunks. This version only handles
-// keys that are a multiple of 32 bits in size.
-static inline uint32_t hashword(const uint64_t *k64, size_t length)
-{
- const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
- uint32_t a,b,c;
-
- /* Set up the internal state */
- a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
-
- /*------------------------------------------------- handle most of the key */
- while (length > 3) {
- a += k[0];
- b += k[1];
- c += k[2];
- mix(a,b,c);
- length -= 3;
- k += 3;
- }
-
- /*------------------------------------------- handle the last 3 uint32_t's */
- switch (length) { /* all the case statements fall through */
- case 3 : c+=k[2];
- case 2 : b+=k[1];
- case 1 : a+=k[0];
- final(a,b,c);
- case 0: /* case 0: nothing left to add */
- break;
- }
- /*------------------------------------------------------ report the result */
- return c;
-}
-
-// hashword8() was adapted from http://www.burtleburtle.net, by Bob
-// Jenkins. This computes a 32-bit hash from one 64-bit word. When
-// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
-// function into about 35 instructions when inlined.
-static inline uint32_t hashword8(const uint64_t k64)
-{
- uint32_t a,b,c;
- a = b = c = 0xdeadbeef + 4;
- b += k64 >> 32;
- a += k64 & 0xffffffff;
- final(a,b,c);
- return c;
-}
-#undef final
-#undef mix
-#undef rot
+hash_code llvm::hash_value(const APInt &Arg) {
+ if (Arg.isSingleWord())
+ return hash_combine(Arg.VAL);
-uint64_t APInt::getHashValue() const {
- uint64_t hash;
- if (isSingleWord())
- hash = hashword8(VAL);
- else
- hash = hashword(pVal, getNumWords()*2);
- return hash;
+ return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
}
/// HiBits - This function returns the high "numBits" bits of this APInt.
@@ -803,20 +706,9 @@ unsigned APInt::countLeadingZerosSlowCase() const {
return Count;
}
-static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
- unsigned Count = 0;
- if (skip)
- V <<= skip;
- while (V && (V & (1ULL << 63))) {
- Count++;
- V <<= 1;
- }
- return Count;
-}
-
unsigned APInt::countLeadingOnes() const {
if (isSingleWord())
- return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+ return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth));
unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
unsigned shift;
@@ -827,13 +719,13 @@ unsigned APInt::countLeadingOnes() const {
shift = APINT_BITS_PER_WORD - highWordBits;
}
int i = getNumWords() - 1;
- unsigned Count = countLeadingOnes_64(pVal[i], shift);
+ unsigned Count = CountLeadingOnes_64(pVal[i] << shift);
if (Count == highWordBits) {
for (i--; i >= 0; --i) {
if (pVal[i] == -1ULL)
Count += APINT_BITS_PER_WORD;
else {
- Count += countLeadingOnes_64(pVal[i], 0);
+ Count += CountLeadingOnes_64(pVal[i]);
break;
}
}
@@ -870,30 +762,43 @@ unsigned APInt::countPopulationSlowCase() const {
return Count;
}
+/// Perform a logical right-shift from Src to Dst, which must be equal or
+/// non-overlapping, of Words words, by Shift, which must be less than 64.
+static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
+ unsigned Shift) {
+ uint64_t Carry = 0;
+ for (int I = Words - 1; I >= 0; --I) {
+ uint64_t Tmp = Src[I];
+ Dst[I] = (Tmp >> Shift) | Carry;
+ Carry = Tmp << (64 - Shift);
+ }
+}
+
APInt APInt::byteSwap() const {
assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
if (BitWidth == 16)
return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
- else if (BitWidth == 32)
+ if (BitWidth == 32)
return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
- else if (BitWidth == 48) {
+ if (BitWidth == 48) {
unsigned Tmp1 = unsigned(VAL >> 16);
Tmp1 = ByteSwap_32(Tmp1);
uint16_t Tmp2 = uint16_t(VAL);
Tmp2 = ByteSwap_16(Tmp2);
return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
- } else if (BitWidth == 64)
+ }
+ if (BitWidth == 64)
return APInt(BitWidth, ByteSwap_64(VAL));
- else {
- APInt Result(BitWidth, 0);
- char *pByte = (char*)Result.pVal;
- for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
- char Tmp = pByte[i];
- pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
- pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
- }
- return Result;
+
+ APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
+ for (unsigned I = 0, N = getNumWords(); I != N; ++I)
+ Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
+ if (Result.BitWidth != BitWidth) {
+ lshrNear(Result.pVal, Result.pVal, getNumWords(),
+ Result.BitWidth - BitWidth);
+ Result.BitWidth = BitWidth;
}
+ return Result;
}
APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
@@ -1110,6 +1015,18 @@ APInt APInt::sextOrTrunc(unsigned width) const {
return *this;
}
+APInt APInt::zextOrSelf(unsigned width) const {
+ if (BitWidth < width)
+ return zext(width);
+ return *this;
+}
+
+APInt APInt::sextOrSelf(unsigned width) const {
+ if (BitWidth < width)
+ return sext(width);
+ return *this;
+}
+
/// Arithmetic right-shift this APInt by shiftAmt.
/// @brief Arithmetic right-shift function.
APInt APInt::ashr(const APInt &shiftAmt) const {
@@ -1209,7 +1126,7 @@ APInt APInt::lshr(const APInt &shiftAmt) const {
/// @brief Logical right-shift function.
APInt APInt::lshr(unsigned shiftAmt) const {
if (isSingleWord()) {
- if (shiftAmt == BitWidth)
+ if (shiftAmt >= BitWidth)
return APInt(BitWidth, 0);
else
return APInt(BitWidth, this->VAL >> shiftAmt);
@@ -1232,11 +1149,7 @@ APInt APInt::lshr(unsigned shiftAmt) const {
// If we are shifting less than a word, compute the shift with a simple carry
if (shiftAmt < APINT_BITS_PER_WORD) {
- uint64_t carry = 0;
- for (int i = getNumWords()-1; i >= 0; --i) {
- val[i] = (pVal[i] >> shiftAmt) | carry;
- carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
- }
+ lshrNear(val, pVal, getNumWords(), shiftAmt);
return APInt(val, BitWidth).clearUnusedBits();
}
@@ -1329,14 +1242,10 @@ APInt APInt::rotl(const APInt &rotateAmt) const {
}
APInt APInt::rotl(unsigned rotateAmt) const {
+ rotateAmt %= BitWidth;
if (rotateAmt == 0)
return *this;
- // Don't get too fancy, just use existing shift/or facilities
- APInt hi(*this);
- APInt lo(*this);
- hi.shl(rotateAmt);
- lo.lshr(BitWidth - rotateAmt);
- return hi | lo;
+ return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
}
APInt APInt::rotr(const APInt &rotateAmt) const {
@@ -1344,14 +1253,10 @@ APInt APInt::rotr(const APInt &rotateAmt) const {
}
APInt APInt::rotr(unsigned rotateAmt) const {
+ rotateAmt %= BitWidth;
if (rotateAmt == 0)
return *this;
- // Don't get too fancy, just use existing shift/or facilities
- APInt hi(*this);
- APInt lo(*this);
- lo.lshr(rotateAmt);
- hi.shl(BitWidth - rotateAmt);
- return hi | lo;
+ return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
}
// Square Root - this method computes and returns the square root of "this".
@@ -1431,15 +1336,11 @@ APInt APInt::sqrt() const {
APInt nextSquare((x_old + 1) * (x_old +1));
if (this->ult(square))
return x_old;
- else if (this->ule(nextSquare)) {
- APInt midpoint((nextSquare - square).udiv(two));
- APInt offset(*this - square);
- if (offset.ult(midpoint))
- return x_old;
- else
- return x_old + 1;
- } else
- llvm_unreachable("Error in APInt::sqrt computation");
+ assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
+ APInt midpoint((nextSquare - square).udiv(two));
+ APInt offset(*this - square);
+ if (offset.ult(midpoint))
+ return x_old;
return x_old + 1;
}
@@ -2184,7 +2085,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
bool Signed, bool formatAsCLiteral) const {
assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
Radix == 36) &&
- "Radix should be 2, 8, 10, or 16!");
+ "Radix should be 2, 8, 10, 16, or 36!");
const char *Prefix = "";
if (formatAsCLiteral) {
@@ -2197,9 +2098,13 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
case 8:
Prefix = "0";
break;
+ case 10:
+ break; // No prefix
case 16:
Prefix = "0x";
break;
+ default:
+ llvm_unreachable("Invalid radix!");
}
}
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 215b0f249d96..b8978302e746 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -22,8 +22,8 @@ namespace llvm {
BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
SlabAllocator &allocator)
- : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
- CurSlab(0), BytesAllocated(0) { }
+ : SlabSize(size), SizeThreshold(std::min(size, threshold)),
+ Allocator(allocator), CurSlab(0), BytesAllocated(0) { }
BumpPtrAllocator::~BumpPtrAllocator() {
DeallocateSlabs(CurSlab);
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 94760cc069fc..3001f6c468aa 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Atomic.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
using namespace llvm;
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index a63bf83f2039..84a993e3e5b6 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -70,8 +70,13 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
assert(n <= d && "Probability must be less or equal to 1.");
- // If we can overflow use 96-bit operations.
- if (n > 0 && Frequency > UINT64_MAX / n) {
+ // Calculate Frequency * n.
+ uint64_t mulLo = (Frequency & UINT32_MAX) * n;
+ uint64_t mulHi = (Frequency >> 32) * n;
+ uint64_t mulRes = (mulHi << 32) + mulLo;
+
+ // If there was overflow use 96-bit operations.
+ if (mulHi > UINT32_MAX || mulRes < mulLo) {
// 96-bit value represented as W[1]:W[0].
uint64_t W[2];
@@ -82,8 +87,7 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
return *this;
}
- Frequency *= n;
- Frequency /= d;
+ Frequency = mulRes / d;
return *this;
}
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 49d04ed83653..e8b83e59802d 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -13,24 +13,17 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-BranchProbability::BranchProbability(uint32_t n, uint32_t d) {
- assert(d > 0 && "Denomiator cannot be 0!");
- assert(n <= d && "Probability cannot be bigger than 1!");
- N = n;
- D = d;
-}
-
void BranchProbability::print(raw_ostream &OS) const {
- OS << N << " / " << D << " = " << ((double)N / D);
+ OS << N << " / " << D << " = " << format("%g%%", ((double)N / D) * 100.0);
}
void BranchProbability::dump() const {
- print(dbgs());
- dbgs() << "\n";
+ dbgs() << *this << '\n';
}
namespace llvm {
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 63a833c38046..9b3b6c801dd0 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMSupport
ConstantRange.cpp
CrashRecoveryContext.cpp
DataExtractor.cpp
+ DataStream.cpp
Debug.cpp
DeltaAlgorithm.cpp
DAGDeltaAlgorithm.cpp
@@ -25,10 +26,14 @@ add_llvm_library(LLVMSupport
FoldingSet.cpp
FormattedStream.cpp
GraphWriter.cpp
+ Hashing.cpp
IntEqClasses.cpp
IntervalMap.cpp
+ IntrusiveRefCntPtr.cpp
IsInf.cpp
IsNAN.cpp
+ JSONParser.cpp
+ LockFileManager.cpp
ManagedStatic.cpp
MemoryBuffer.cpp
MemoryObject.cpp
@@ -39,6 +44,7 @@ add_llvm_library(LLVMSupport
SmallVector.cpp
SourceMgr.cpp
Statistic.cpp
+ StreamableMemoryObject.cpp
StringExtras.cpp
StringMap.cpp
StringPool.cpp
@@ -48,6 +54,7 @@ add_llvm_library(LLVMSupport
ToolOutputFile.cpp
Triple.cpp
Twine.cpp
+ YAMLParser.cpp
raw_os_ostream.cpp
raw_ostream.cpp
regcomp.c
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 238adcce0a12..e6fdf16a82de 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -57,6 +57,9 @@ TEMPLATE_INSTANTIATION(class opt<char>);
TEMPLATE_INSTANTIATION(class opt<bool>);
} } // end namespace llvm::cl
+void GenericOptionValue::anchor() {}
+void OptionValue<boolOrDefault>::anchor() {}
+void OptionValue<std::string>::anchor() {}
void Option::anchor() {}
void basic_parser_impl::anchor() {}
void parser<bool>::anchor() {}
@@ -263,8 +266,8 @@ static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
/// and a null value (StringRef()). The later is accepted for arguments that
/// don't allow a value (-foo) the former is rejected (-foo=).
static inline bool ProvideOption(Option *Handler, StringRef ArgName,
- StringRef Value, int argc, char **argv,
- int &i) {
+ StringRef Value, int argc,
+ const char *const *argv, int &i) {
// Is this a multi-argument option?
unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
@@ -289,12 +292,6 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
break;
case ValueOptional:
break;
-
- default:
- errs() << ProgramName
- << ": Bad ValueMask flag! CommandLine usage error:"
- << Handler->getValueExpectedFlag() << "\n";
- llvm_unreachable(0);
}
// If this isn't a multi-arg option, just run the handler.
@@ -498,10 +495,10 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
/// ExpandResponseFiles - Copy the contents of argv into newArgv,
/// substituting the contents of the response files for the arguments
/// of type @file.
-static void ExpandResponseFiles(unsigned argc, char** argv,
+static void ExpandResponseFiles(unsigned argc, const char*const* argv,
std::vector<char*>& newArgv) {
for (unsigned i = 1; i != argc; ++i) {
- char *arg = argv[i];
+ const char *arg = argv[i];
if (arg[0] == '@') {
sys::PathWithStatus respFile(++arg);
@@ -531,7 +528,7 @@ static void ExpandResponseFiles(unsigned argc, char** argv,
}
}
-void cl::ParseCommandLineOptions(int argc, char **argv,
+void cl::ParseCommandLineOptions(int argc, const char * const *argv,
const char *Overview, bool ReadResponseFiles) {
// Process all registered options.
SmallVector<Option*, 4> PositionalOpts;
@@ -885,7 +882,6 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName,
case OneOrMore:
case ZeroOrMore:
case ConsumeAfter: break;
- default: return error("bad num occurrences flag value!");
}
return handleOccurrence(pos, ArgName, Value);
@@ -1195,7 +1191,7 @@ printOptionNoValue(const Option &O, size_t GlobalWidth) const {
static int OptNameCompare(const void *LHS, const void *RHS) {
typedef std::pair<const char *, Option*> pair_ty;
- return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+ return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
}
// Copy Options into a vector so we can sort them as we like.
@@ -1349,7 +1345,7 @@ class VersionPrinter {
public:
void print() {
raw_ostream &OS = outs();
- OS << "Low Level Virtual Machine (http://llvm.org/):\n"
+ OS << "LLVM (http://llvm.org/):\n"
<< " " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
#ifdef LLVM_VERSION_INFO
OS << LLVM_VERSION_INFO;
@@ -1369,7 +1365,7 @@ public:
#if (ENABLE_TIMESTAMPS == 1)
<< " Built " << __DATE__ << " (" << __TIME__ << ").\n"
#endif
- << " Host: " << sys::getHostTriple() << '\n'
+ << " Default target: " << sys::getDefaultTargetTriple() << '\n'
<< " Host CPU: " << CPU << '\n';
}
void operator=(bool OptionWasSpecified) {
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index c29cb53fb9c5..5206cf1f9b8c 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -55,7 +55,7 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
uint32_t W = CR.getBitWidth();
switch (Pred) {
- default: assert(0 && "Invalid ICmp predicate to makeICmpRegion()");
+ default: llvm_unreachable("Invalid ICmp predicate to makeICmpRegion()");
case CmpInst::ICMP_EQ:
return CR;
case CmpInst::ICMP_NE:
@@ -161,8 +161,7 @@ APInt ConstantRange::getSetSize() const {
APInt ConstantRange::getUnsignedMax() const {
if (isFullSet() || isWrappedSet())
return APInt::getMaxValue(getBitWidth());
- else
- return getUpper() - 1;
+ return getUpper() - 1;
}
/// getUnsignedMin - Return the smallest unsigned value contained in the
@@ -171,8 +170,7 @@ APInt ConstantRange::getUnsignedMax() const {
APInt ConstantRange::getUnsignedMin() const {
if (isFullSet() || (isWrappedSet() && getUpper() != 0))
return APInt::getMinValue(getBitWidth());
- else
- return getLower();
+ return getLower();
}
/// getSignedMax - Return the largest signed value contained in the
@@ -183,14 +181,11 @@ APInt ConstantRange::getSignedMax() const {
if (!isWrappedSet()) {
if (getLower().sle(getUpper() - 1))
return getUpper() - 1;
- else
- return SignedMax;
- } else {
- if (getLower().isNegative() == getUpper().isNegative())
- return SignedMax;
- else
- return getUpper() - 1;
+ return SignedMax;
}
+ if (getLower().isNegative() == getUpper().isNegative())
+ return SignedMax;
+ return getUpper() - 1;
}
/// getSignedMin - Return the smallest signed value contained in the
@@ -201,18 +196,13 @@ APInt ConstantRange::getSignedMin() const {
if (!isWrappedSet()) {
if (getLower().sle(getUpper() - 1))
return getLower();
- else
+ return SignedMin;
+ }
+ if ((getUpper() - 1).slt(getLower())) {
+ if (getUpper() != SignedMin)
return SignedMin;
- } else {
- if ((getUpper() - 1).slt(getLower())) {
- if (getUpper() != SignedMin)
- return SignedMin;
- else
- return getLower();
- } else {
- return getLower();
- }
}
+ return getLower();
}
/// contains - Return true if the specified value is in the set.
@@ -223,8 +213,7 @@ bool ConstantRange::contains(const APInt &V) const {
if (!isWrappedSet())
return Lower.ule(V) && V.ult(Upper);
- else
- return Lower.ule(V) || V.ult(Upper);
+ return Lower.ule(V) || V.ult(Upper);
}
/// contains - Return true if the argument is a subset of this range.
@@ -284,15 +273,14 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
return ConstantRange(CR.Lower, Upper);
return CR;
- } else {
- if (Upper.ult(CR.Upper))
- return *this;
+ }
+ if (Upper.ult(CR.Upper))
+ return *this;
- if (Lower.ult(CR.Upper))
- return ConstantRange(Lower, CR.Upper);
+ if (Lower.ult(CR.Upper))
+ return ConstantRange(Lower, CR.Upper);
- return ConstantRange(getBitWidth(), false);
- }
+ return ConstantRange(getBitWidth(), false);
}
if (isWrappedSet() && !CR.isWrappedSet()) {
@@ -305,9 +293,9 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
if (getSetSize().ult(CR.getSetSize()))
return *this;
- else
- return CR;
- } else if (CR.Lower.ult(Lower)) {
+ return CR;
+ }
+ if (CR.Lower.ult(Lower)) {
if (CR.Upper.ule(Lower))
return ConstantRange(getBitWidth(), false);
@@ -320,15 +308,15 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
if (CR.Lower.ult(Upper)) {
if (getSetSize().ult(CR.getSetSize()))
return *this;
- else
- return CR;
+ return CR;
}
if (CR.Lower.ult(Lower))
return ConstantRange(Lower, CR.Upper);
return CR;
- } else if (CR.Upper.ult(Lower)) {
+ }
+ if (CR.Upper.ult(Lower)) {
if (CR.Lower.ult(Lower))
return *this;
@@ -336,8 +324,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
}
if (getSetSize().ult(CR.getSetSize()))
return *this;
- else
- return CR;
+ return CR;
}
@@ -362,8 +349,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
if (d1.ult(d2))
return ConstantRange(Lower, CR.Upper);
- else
- return ConstantRange(CR.Lower, Upper);
+ return ConstantRange(CR.Lower, Upper);
}
APInt L = Lower, U = Upper;
@@ -396,8 +382,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
if (d1.ult(d2))
return ConstantRange(Lower, CR.Upper);
- else
- return ConstantRange(CR.Lower, Upper);
+ return ConstantRange(CR.Lower, Upper);
}
// ----U L----- : this
@@ -407,13 +392,11 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
// ------U L---- : this
// L-----U : CR
- if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower))
- return ConstantRange(Lower, CR.Upper);
+ assert(CR.Lower.ult(Upper) && CR.Upper.ult(Lower) &&
+ "ConstantRange::unionWith missed a case with one range wrapped");
+ return ConstantRange(Lower, CR.Upper);
}
- assert(isWrappedSet() && CR.isWrappedSet() &&
- "ConstantRange::unionWith missed wrapped union unwrapped case");
-
// ------U L---- and ------U L---- : this
// -U L----------- and ------------U L : CR
if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
@@ -466,10 +449,8 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
/// correspond to the possible range of values as if the source range had been
/// truncated to the specified type.
ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
- unsigned SrcTySize = getBitWidth();
- assert(SrcTySize > DstTySize && "Not a value truncation");
- APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
- if (isFullSet() || getSetSize().ugt(Size))
+ assert(getBitWidth() > DstTySize && "Not a value truncation");
+ if (isFullSet() || getSetSize().getActiveBits() > DstTySize)
return ConstantRange(DstTySize, /*isFullSet=*/true);
return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
@@ -481,10 +462,9 @@ ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
unsigned SrcTySize = getBitWidth();
if (SrcTySize > DstTySize)
return truncate(DstTySize);
- else if (SrcTySize < DstTySize)
+ if (SrcTySize < DstTySize)
return zeroExtend(DstTySize);
- else
- return *this;
+ return *this;
}
/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
@@ -493,10 +473,9 @@ ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
unsigned SrcTySize = getBitWidth();
if (SrcTySize > DstTySize)
return truncate(DstTySize);
- else if (SrcTySize < DstTySize)
+ if (SrcTySize < DstTySize)
return signExtend(DstTySize);
- else
- return *this;
+ return *this;
}
ConstantRange
@@ -675,11 +654,10 @@ ConstantRange::lshr(const ConstantRange &Other) const {
}
ConstantRange ConstantRange::inverse() const {
- if (isFullSet()) {
+ if (isFullSet())
return ConstantRange(getBitWidth(), /*isFullSet=*/false);
- } else if (isEmptySet()) {
+ if (isEmptySet())
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- }
return ConstantRange(Upper, Lower);
}
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 263114c06f98..e2af0bc17655 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -165,7 +165,6 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
// Note that we don't actually get here because HandleCrash calls
// longjmp, which means the HandleCrash function never returns.
llvm_unreachable("Handled the crash, should have longjmp'ed out of here");
- return EXCEPTION_CONTINUE_SEARCH;
}
// Because the Enable and Disable calls are static, it means that
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 814566494d30..1e89c6ad2ff2 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -350,6 +350,9 @@ DAGDeltaAlgorithmImpl::Run() {
return Required;
}
+void DAGDeltaAlgorithm::anchor() {
+}
+
DAGDeltaAlgorithm::changeset_ty
DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
const std::vector<edge_ty> &Dependencies) {
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index b946c1df8363..dc21155a0624 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -75,7 +75,7 @@ uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const {
uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst,
uint32_t count) const {
return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
- Data.data());;
+ Data.data());
}
uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const {
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
new file mode 100644
index 000000000000..94d14a5e36b0
--- /dev/null
+++ b/lib/Support/DataStream.cpp
@@ -0,0 +1,98 @@
+//===--- llvm/Support/DataStream.cpp - Lazy streamed data -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DataStreamer, which fetches bytes of Data from
+// a stream source. It provides support for streaming (lazy reading) of
+// bitcode. An example implementation of streaming from a file or stdin
+// is included.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "Data-stream"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <cerrno>
+#include <cstdio>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+// Interface goals:
+// * StreamableMemoryObject doesn't care about complexities like using
+// threads/async callbacks to actually overlap download+compile
+// * Don't want to duplicate Data in memory
+// * Don't need to know total Data len in advance
+// Non-goals:
+// StreamableMemoryObject already has random access so this interface only does
+// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
+// Data here in addition to MemoryObject). This also means that if we want
+// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
+
+STATISTIC(NumStreamFetches, "Number of calls to Data stream fetch");
+
+namespace llvm {
+DataStreamer::~DataStreamer() {}
+}
+
+namespace {
+
+// Very simple stream backed by a file. Mostly useful for stdin and debugging;
+// actual file access is probably still best done with mmap.
+class DataFileStreamer : public DataStreamer {
+ int Fd;
+public:
+ DataFileStreamer() : Fd(0) {}
+ virtual ~DataFileStreamer() {
+ close(Fd);
+ }
+ virtual size_t GetBytes(unsigned char *buf, size_t len) {
+ NumStreamFetches++;
+ return read(Fd, buf, len);
+ }
+
+ error_code OpenFile(const std::string &Filename) {
+ if (Filename == "-") {
+ Fd = 0;
+ sys::Program::ChangeStdinToBinary();
+ return error_code::success();
+ }
+
+ int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+ OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
+#endif
+ Fd = ::open(Filename.c_str(), OpenFlags);
+ if (Fd == -1)
+ return error_code(errno, posix_category());
+ return error_code::success();
+ }
+};
+
+}
+
+namespace llvm {
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+ std::string *StrError) {
+ DataFileStreamer *s = new DataFileStreamer();
+ if (error_code e = s->OpenFile(Filename)) {
+ *StrError = std::string("Could not open ") + Filename + ": " +
+ e.message() + "\n";
+ return NULL;
+ }
+ return s;
+}
+
+}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 95a9550f9663..5c59a3ef8ef3 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -95,6 +95,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
return "DW_TAG_GNU_template_parameter_pack";
case DW_TAG_GNU_formal_parameter_pack:
return "DW_TAG_GNU_formal_parameter_pack";
+ case DW_TAG_APPLE_property: return "DW_TAG_APPLE_property";
}
return 0;
}
@@ -245,6 +246,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
case DW_AT_APPLE_property_getter: return "DW_AT_APPLE_property_getter";
case DW_AT_APPLE_property_setter: return "DW_AT_APPLE_property_setter";
case DW_AT_APPLE_property_attribute: return "DW_AT_APPLE_property_attribute";
+ case DW_AT_APPLE_property: return "DW_AT_APPLE_property";
case DW_AT_APPLE_objc_complete_type: return "DW_AT_APPLE_objc_complete_type";
}
return 0;
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 4c8c0c63ffc4..f9e9cf036608 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -200,7 +200,6 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
// Now its safe to mmap the files into memory because both files
// have a non-zero size.
- error_code ec;
OwningPtr<MemoryBuffer> F1;
if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
if (Error)
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 17b827132f57..c6282c6ab2ab 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -29,24 +30,7 @@ using namespace llvm;
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
/// used to lookup the node in the FoldingSetImpl.
unsigned FoldingSetNodeIDRef::ComputeHash() const {
- // This is adapted from SuperFastHash by Paul Hsieh.
- unsigned Hash = static_cast<unsigned>(Size);
- for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
- unsigned Data = *BP;
- Hash += Data & 0xFFFF;
- unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
- Hash = (Hash << 16) ^ Tmp;
- Hash += Hash >> 11;
- }
-
- // Force "avalanching" of final 127 bits.
- Hash ^= Hash << 3;
- Hash += Hash >> 5;
- Hash ^= Hash << 4;
- Hash += Hash >> 17;
- Hash ^= Hash << 25;
- Hash += Hash >> 6;
- return Hash;
+ return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
}
bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
@@ -281,15 +265,15 @@ void FoldingSetImpl::GrowHashTable() {
FoldingSetImpl::Node
*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void *&InsertPos) {
-
- void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
+ unsigned IDHash = ID.ComputeHash();
+ void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
void *Probe = *Bucket;
InsertPos = 0;
FoldingSetNodeID TempID;
while (Node *NodeInBucket = GetNextPtr(Probe)) {
- if (NodeEquals(NodeInBucket, ID, TempID))
+ if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
return NodeInBucket;
TempID.clear();
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 0dba28a2530c..32126ec39eba 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -11,12 +11,16 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
using namespace llvm;
+static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
+ cl::desc("Execute graph viewer in the background. Creates tmp file litter."));
+
std::string llvm::DOT::EscapeString(const std::string &Label) {
std::string Str(Label);
for (unsigned i = 0; i != Str.length(); ++i)
@@ -49,10 +53,28 @@ std::string llvm::DOT::EscapeString(const std::string &Label) {
return Str;
}
-
+// Execute the graph viewer. Return true if successful.
+static bool LLVM_ATTRIBUTE_UNUSED
+ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args,
+ const sys::Path &Filename, bool wait, std::string &ErrMsg) {
+ if (wait) {
+ if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return false;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
+ }
+ else {
+ sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg);
+ errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+ }
+ return true;
+}
void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
GraphProgram::Name program) {
+ wait &= !ViewBackground;
std::string ErrMsg;
#if HAVE_GRAPHVIZ
sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
@@ -61,14 +83,10 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(Graphviz.c_str());
args.push_back(Filename.c_str());
args.push_back(0);
-
+
errs() << "Running 'Graphviz' program... ";
- if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
+ if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
return;
- }
- Filename.eraseFromDisk();
- errs() << " done. \n";
#elif HAVE_XDOT_PY
std::vector<const char*> args;
@@ -83,17 +101,12 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
default: errs() << "Unknown graph layout name; using default.\n";
}
-
+
args.push_back(0);
errs() << "Running 'xdot.py' program... ";
- if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
- &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
+ if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg))
return;
- }
- Filename.eraseFromDisk();
- errs() << " done. \n";
#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
HAVE_TWOPI || HAVE_CIRCO))
@@ -150,14 +163,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back("-o");
args.push_back(PSFilename.c_str());
args.push_back(0);
-
+
errs() << "Running '" << prog.str() << "' program... ";
- if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
+ if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg))
return;
- }
- errs() << " done. \n";
sys::Path gv(LLVM_PATH_GV);
args.clear();
@@ -165,19 +175,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(PSFilename.c_str());
args.push_back("--spartan");
args.push_back(0);
-
+
ErrMsg.clear();
- if (wait) {
- if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
- errs() << "Error: " << ErrMsg << "\n";
- Filename.eraseFromDisk();
- PSFilename.eraseFromDisk();
- }
- else {
- sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
- errs() << "Remember to erase graph files: " << Filename.str() << " "
- << PSFilename.str() << "\n";
- }
+ if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg))
+ return;
+
#elif HAVE_DOTTY
sys::Path dotty(LLVM_PATH_DOTTY);
@@ -185,16 +187,13 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(dotty.c_str());
args.push_back(Filename.c_str());
args.push_back(0);
-
- errs() << "Running 'dotty' program... ";
- if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error: " << ErrMsg << "\n";
- } else {
+
// Dotty spawns another app and doesn't wait until it returns
#if defined (__MINGW32__) || defined (_WINDOWS)
- return;
+ wait = false;
#endif
- Filename.eraseFromDisk();
- }
+ errs() << "Running 'dotty' program... ";
+ if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
+ return;
#endif
}
diff --git a/lib/Support/Hashing.cpp b/lib/Support/Hashing.cpp
new file mode 100644
index 000000000000..c69efb7c3cc9
--- /dev/null
+++ b/lib/Support/Hashing.cpp
@@ -0,0 +1,29 @@
+//===-------------- lib/Support/Hashing.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides implementation bits for the LLVM common hashing
+// infrastructure. Documentation and most of the other information is in the
+// header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Hashing.h"
+
+using namespace llvm;
+
+// Provide a definition and static initializer for the fixed seed. This
+// initializer should always be zero to ensure its value can never appear to be
+// non-zero, even during dynamic initialization.
+size_t llvm::hashing::detail::fixed_seed_override = 0;
+
+// Implement the function for forced setting of the fixed seed.
+// FIXME: Use atomic operations here so that there is no data race.
+void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
+ hashing::detail::fixed_seed_override = fixed_value;
+}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index a19e4b41189b..0f0696438ca6 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -61,6 +61,8 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
*rECX = registers[2];
*rEDX = registers[3];
return false;
+ #else
+ return true;
#endif
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
#if defined(__GNUC__)
@@ -87,9 +89,14 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
mov dword ptr [esi],edx
}
return false;
+// pedantic #else returns to appease -Wunreachable-code (so we don't generate
+// postprocessed code that looks like "return true; return false;")
+ #else
+ return true;
#endif
-#endif
+#else
return true;
+#endif
}
static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -298,6 +305,10 @@ std::string sys::getHostCPUName() {
}
case 16:
return "amdfam10";
+ case 20:
+ return "btver1";
+ case 21:
+ return "bdver1";
default:
return "generic";
}
diff --git a/lib/Support/IntrusiveRefCntPtr.cpp b/lib/Support/IntrusiveRefCntPtr.cpp
new file mode 100644
index 000000000000..a8b45593ae70
--- /dev/null
+++ b/lib/Support/IntrusiveRefCntPtr.cpp
@@ -0,0 +1,14 @@
+//== IntrusiveRefCntPtr.cpp - Smart Refcounting Pointer ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+
+using namespace llvm;
+
+void RefCountedBaseVPTR::anchor() { }
diff --git a/lib/Support/JSONParser.cpp b/lib/Support/JSONParser.cpp
new file mode 100644
index 000000000000..5dfcf297a7ea
--- /dev/null
+++ b/lib/Support/JSONParser.cpp
@@ -0,0 +1,302 @@
+//===--- JSONParser.cpp - Simple JSON parser ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a JSON parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/JSONParser.h"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+JSONParser::JSONParser(StringRef Input, SourceMgr *SM)
+ : SM(SM), Failed(false) {
+ InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON");
+ SM->AddNewSourceBuffer(InputBuffer, SMLoc());
+ End = InputBuffer->getBuffer().end();
+ Position = InputBuffer->getBuffer().begin();
+}
+
+JSONValue *JSONParser::parseRoot() {
+ if (Position != InputBuffer->getBuffer().begin())
+ report_fatal_error("Cannot reuse JSONParser.");
+ if (isWhitespace())
+ nextNonWhitespace();
+ if (errorIfAtEndOfFile("'[' or '{' at start of JSON text"))
+ return 0;
+ switch (*Position) {
+ case '[':
+ return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
+ case '{':
+ return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
+ default:
+ setExpectedError("'[' or '{' at start of JSON text", *Position);
+ return 0;
+ }
+}
+
+bool JSONParser::validate() {
+ JSONValue *Root = parseRoot();
+ if (Root == NULL) {
+ return false;
+ }
+ return skip(*Root);
+}
+
+bool JSONParser::skip(const JSONAtom &Atom) {
+ switch(Atom.getKind()) {
+ case JSONAtom::JK_Array:
+ case JSONAtom::JK_Object:
+ return skipContainer(*cast<JSONContainer>(&Atom));
+ case JSONAtom::JK_String:
+ return true;
+ case JSONAtom::JK_KeyValuePair:
+ return skip(*cast<JSONKeyValuePair>(&Atom)->Value);
+ }
+ llvm_unreachable("Impossible enum value.");
+}
+
+// Sets the current error to:
+// "expected <Expected>, but found <Found>".
+void JSONParser::setExpectedError(StringRef Expected, StringRef Found) {
+ SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error,
+ "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>());
+ Failed = true;
+}
+
+// Sets the current error to:
+// "expected <Expected>, but found <Found>".
+void JSONParser::setExpectedError(StringRef Expected, char Found) {
+ setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str());
+}
+
+// If there is no character available, returns true and sets the current error
+// to: "expected <Expected>, but found EOF.".
+bool JSONParser::errorIfAtEndOfFile(StringRef Expected) {
+ if (Position == End) {
+ setExpectedError(Expected, "EOF");
+ return true;
+ }
+ return false;
+}
+
+// Sets the current error if the current character is not C to:
+// "expected 'C', but got <current character>".
+bool JSONParser::errorIfNotAt(char C, StringRef Message) {
+ if (*Position != C) {
+ std::string Expected =
+ ("'" + StringRef(&C, 1) + "' " + Message).str();
+ if (Position == End)
+ setExpectedError(Expected, "EOF");
+ else
+ setExpectedError(Expected, *Position);
+ return true;
+ }
+ return false;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+ StringRef::iterator Position) {
+ assert(Position - 1 >= First);
+ StringRef::iterator I = Position - 1;
+ // We calulate the number of consecutive '\'s before the current position
+ // by iterating backwards through our string.
+ while (I >= First && *I == '\\') --I;
+ // (Position - 1 - I) now contains the number of '\'s before the current
+ // position. If it is odd, the character at 'Positon' was escaped.
+ return (Position - 1 - I) % 2 == 1;
+}
+
+// Parses a JSONString, assuming that the current position is on a quote.
+JSONString *JSONParser::parseString() {
+ assert(Position != End);
+ assert(!isWhitespace());
+ if (errorIfNotAt('"', "at start of string"))
+ return 0;
+ StringRef::iterator First = Position + 1;
+
+ // Benchmarking shows that this loop is the hot path of the application with
+ // about 2/3rd of the runtime cycles. Since escaped quotes are not the common
+ // case, and multiple escaped backslashes before escaped quotes are very rare,
+ // we pessimize this case to achieve a smaller inner loop in the common case.
+ // We're doing that by having a quick inner loop that just scans for the next
+ // quote. Once we find the quote we check the last character to see whether
+ // the quote might have been escaped. If the last character is not a '\', we
+ // know the quote was not escaped and have thus found the end of the string.
+ // If the immediately preceding character was a '\', we have to scan backwards
+ // to see whether the previous character was actually an escaped backslash, or
+ // an escape character for the quote. If we find that the current quote was
+ // escaped, we continue parsing for the next quote and repeat.
+ // This optimization brings around 30% performance improvements.
+ do {
+ // Step over the current quote.
+ ++Position;
+ // Find the next quote.
+ while (Position != End && *Position != '"')
+ ++Position;
+ if (errorIfAtEndOfFile("'\"' at end of string"))
+ return 0;
+ // Repeat until the previous character was not a '\' or was an escaped
+ // backslash.
+ } while (*(Position - 1) == '\\' && wasEscaped(First, Position));
+
+ return new (ValueAllocator.Allocate<JSONString>())
+ JSONString(StringRef(First, Position - First));
+}
+
+
+// Advances the position to the next non-whitespace position.
+void JSONParser::nextNonWhitespace() {
+ do {
+ ++Position;
+ } while (isWhitespace());
+}
+
+// Checks if there is a whitespace character at the current position.
+bool JSONParser::isWhitespace() {
+ return *Position == ' ' || *Position == '\t' ||
+ *Position == '\n' || *Position == '\r';
+}
+
+bool JSONParser::failed() const {
+ return Failed;
+}
+
+// Parses a JSONValue, assuming that the current position is at the first
+// character of the value.
+JSONValue *JSONParser::parseValue() {
+ assert(Position != End);
+ assert(!isWhitespace());
+ switch (*Position) {
+ case '[':
+ return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
+ case '{':
+ return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
+ case '"':
+ return parseString();
+ default:
+ setExpectedError("'[', '{' or '\"' at start of value", *Position);
+ return 0;
+ }
+}
+
+// Parses a JSONKeyValuePair, assuming that the current position is at the first
+// character of the key, value pair.
+JSONKeyValuePair *JSONParser::parseKeyValuePair() {
+ assert(Position != End);
+ assert(!isWhitespace());
+
+ JSONString *Key = parseString();
+ if (Key == 0)
+ return 0;
+
+ nextNonWhitespace();
+ if (errorIfNotAt(':', "between key and value"))
+ return 0;
+
+ nextNonWhitespace();
+ const JSONValue *Value = parseValue();
+ if (Value == 0)
+ return 0;
+
+ return new (ValueAllocator.Allocate<JSONKeyValuePair>(1))
+ JSONKeyValuePair(Key, Value);
+}
+
+/// \brief Parses the first element of a JSON array or object, or closes the
+/// array.
+///
+/// The method assumes that the current position is before the first character
+/// of the element, with possible white space in between. When successful, it
+/// returns the new position after parsing the element. Otherwise, if there is
+/// no next value, it returns a default constructed StringRef::iterator.
+StringRef::iterator JSONParser::parseFirstElement(JSONAtom::Kind ContainerKind,
+ char StartChar, char EndChar,
+ const JSONAtom *&Element) {
+ assert(*Position == StartChar);
+ Element = 0;
+ nextNonWhitespace();
+ if (errorIfAtEndOfFile("value or end of container at start of container"))
+ return StringRef::iterator();
+
+ if (*Position == EndChar)
+ return StringRef::iterator();
+
+ Element = parseElement(ContainerKind);
+ if (Element == 0)
+ return StringRef::iterator();
+
+ return Position;
+}
+
+/// \brief Parses the next element of a JSON array or object, or closes the
+/// array.
+///
+/// The method assumes that the current position is before the ',' which
+/// separates the next element from the current element. When successful, it
+/// returns the new position after parsing the element. Otherwise, if there is
+/// no next value, it returns a default constructed StringRef::iterator.
+StringRef::iterator JSONParser::parseNextElement(JSONAtom::Kind ContainerKind,
+ char EndChar,
+ const JSONAtom *&Element) {
+ Element = 0;
+ nextNonWhitespace();
+ if (errorIfAtEndOfFile("',' or end of container for next element"))
+ return 0;
+
+ if (*Position == ',') {
+ nextNonWhitespace();
+ if (errorIfAtEndOfFile("element in container"))
+ return StringRef::iterator();
+
+ Element = parseElement(ContainerKind);
+ if (Element == 0)
+ return StringRef::iterator();
+
+ return Position;
+ } else if (*Position == EndChar) {
+ return StringRef::iterator();
+ } else {
+ setExpectedError("',' or end of container for next element", *Position);
+ return StringRef::iterator();
+ }
+}
+
+const JSONAtom *JSONParser::parseElement(JSONAtom::Kind ContainerKind) {
+ switch (ContainerKind) {
+ case JSONAtom::JK_Array:
+ return parseValue();
+ case JSONAtom::JK_Object:
+ return parseKeyValuePair();
+ default:
+ llvm_unreachable("Impossible code path");
+ }
+}
+
+bool JSONParser::skipContainer(const JSONContainer &Container) {
+ for (JSONContainer::AtomIterator I = Container.atom_current(),
+ E = Container.atom_end();
+ I != E; ++I) {
+ assert(*I != 0);
+ if (!skip(**I))
+ return false;
+ }
+ return !failed();
+}
diff --git a/lib/Support/LLVMBuild.txt b/lib/Support/LLVMBuild.txt
new file mode 100644
index 000000000000..5b88be0203e5
--- /dev/null
+++ b/lib/Support/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/Support/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Support
+parent = Libraries
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
new file mode 100644
index 000000000000..64404a1a8e77
--- /dev/null
+++ b/lib/Support/LockFileManager.cpp
@@ -0,0 +1,216 @@
+//===--- LockFileManager.cpp - File-level Locking Utility------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/LockFileManager.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if LLVM_ON_WIN32
+#include <windows.h>
+#endif
+#if LLVM_ON_UNIX
+#include <unistd.h>
+#endif
+using namespace llvm;
+
+/// \brief Attempt to read the lock file with the given name, if it exists.
+///
+/// \param LockFileName The name of the lock file to read.
+///
+/// \returns The process ID of the process that owns this lock file
+Optional<std::pair<std::string, int> >
+LockFileManager::readLockFile(StringRef LockFileName) {
+ // Check whether the lock file exists. If not, clearly there's nothing
+ // to read, so we just return.
+ bool Exists = false;
+ if (sys::fs::exists(LockFileName, Exists) || !Exists)
+ return Optional<std::pair<std::string, int> >();
+
+ // Read the owning host and PID out of the lock file. If it appears that the
+ // owning process is dead, the lock file is invalid.
+ int PID = 0;
+ std::string Hostname;
+ std::ifstream Input(LockFileName.str().c_str());
+ if (Input >> Hostname >> PID && PID > 0 &&
+ processStillExecuting(Hostname, PID))
+ return std::make_pair(Hostname, PID);
+
+ // Delete the lock file. It's invalid anyway.
+ bool Existed;
+ sys::fs::remove(LockFileName, Existed);
+ return Optional<std::pair<std::string, int> >();
+}
+
+bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
+#if LLVM_ON_UNIX
+ char MyHostname[256];
+ MyHostname[255] = 0;
+ MyHostname[0] = 0;
+ gethostname(MyHostname, 255);
+ // Check whether the process is dead. If so, we're done.
+ if (MyHostname == Hostname && getsid(PID) == -1 && errno == ESRCH)
+ return false;
+#endif
+
+ return true;
+}
+
+LockFileManager::LockFileManager(StringRef FileName)
+{
+ LockFileName = FileName;
+ LockFileName += ".lock";
+
+ // If the lock file already exists, don't bother to try to create our own
+ // lock file; it won't work anyway. Just figure out who owns this lock file.
+ if ((Owner = readLockFile(LockFileName)))
+ return;
+
+ // Create a lock file that is unique to this instance.
+ UniqueLockFileName = LockFileName;
+ UniqueLockFileName += "-%%%%%%%%";
+ int UniqueLockFileID;
+ if (error_code EC
+ = sys::fs::unique_file(UniqueLockFileName.str(),
+ UniqueLockFileID,
+ UniqueLockFileName,
+ /*makeAbsolute=*/false)) {
+ Error = EC;
+ return;
+ }
+
+ // Write our process ID to our unique lock file.
+ {
+ raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
+
+#if LLVM_ON_UNIX
+ // FIXME: move getpid() call into LLVM
+ char hostname[256];
+ hostname[255] = 0;
+ hostname[0] = 0;
+ gethostname(hostname, 255);
+ Out << hostname << ' ' << getpid();
+#else
+ Out << "localhost 1";
+#endif
+ Out.close();
+
+ if (Out.has_error()) {
+ // We failed to write out PID, so make up an excuse, remove the
+ // unique lock file, and fail.
+ Error = make_error_code(errc::no_space_on_device);
+ bool Existed;
+ sys::fs::remove(UniqueLockFileName.c_str(), Existed);
+ return;
+ }
+ }
+
+ // Create a hard link from the lock file name. If this succeeds, we're done.
+ error_code EC
+ = sys::fs::create_hard_link(UniqueLockFileName.str(),
+ LockFileName.str());
+ if (EC == errc::success)
+ return;
+
+ // Creating the hard link failed.
+
+#ifdef LLVM_ON_UNIX
+ // The creation of the hard link may appear to fail, but if stat'ing the
+ // unique file returns a link count of 2, then we can still declare success.
+ struct stat StatBuf;
+ if (stat(UniqueLockFileName.c_str(), &StatBuf) == 0 &&
+ StatBuf.st_nlink == 2)
+ return;
+#endif
+
+ // Someone else managed to create the lock file first. Wipe out our unique
+ // lock file (it's useless now) and read the process ID from the lock file.
+ bool Existed;
+ sys::fs::remove(UniqueLockFileName.str(), Existed);
+ if ((Owner = readLockFile(LockFileName)))
+ return;
+
+ // There is a lock file that nobody owns; try to clean it up and report
+ // an error.
+ sys::fs::remove(LockFileName.str(), Existed);
+ Error = EC;
+}
+
+LockFileManager::LockFileState LockFileManager::getState() const {
+ if (Owner)
+ return LFS_Shared;
+
+ if (Error)
+ return LFS_Error;
+
+ return LFS_Owned;
+}
+
+LockFileManager::~LockFileManager() {
+ if (getState() != LFS_Owned)
+ return;
+
+ // Since we own the lock, remove the lock file and our own unique lock file.
+ bool Existed;
+ sys::fs::remove(LockFileName.str(), Existed);
+ sys::fs::remove(UniqueLockFileName.str(), Existed);
+}
+
+void LockFileManager::waitForUnlock() {
+ if (getState() != LFS_Shared)
+ return;
+
+#if LLVM_ON_WIN32
+ unsigned long Interval = 1;
+#else
+ struct timespec Interval;
+ Interval.tv_sec = 0;
+ Interval.tv_nsec = 1000000;
+#endif
+ // Don't wait more than an hour for the file to appear.
+ const unsigned MaxSeconds = 3600;
+ do {
+ // Sleep for the designated interval, to allow the owning process time to
+ // finish up and remove the lock file.
+ // FIXME: Should we hook in to system APIs to get a notification when the
+ // lock file is deleted?
+#if LLVM_ON_WIN32
+ Sleep(Interval);
+#else
+ nanosleep(&Interval, NULL);
+#endif
+ // If the file no longer exists, we're done.
+ bool Exists = false;
+ if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists)
+ return;
+
+ if (!processStillExecuting((*Owner).first, (*Owner).second))
+ return;
+
+ // Exponentially increase the time we wait for the lock to be removed.
+#if LLVM_ON_WIN32
+ Interval *= 2;
+#else
+ Interval.tv_sec *= 2;
+ Interval.tv_nsec *= 2;
+ if (Interval.tv_nsec >= 1000000000) {
+ ++Interval.tv_sec;
+ Interval.tv_nsec -= 1000000000;
+ }
+#endif
+ } while (
+#if LLVM_ON_WIN32
+ Interval < MaxSeconds * 1000
+#else
+ Interval.tv_sec < (time_t)MaxSeconds
+#endif
+ );
+
+ // Give up.
+}
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index c767c15e71c9..098cccb68df5 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -27,8 +27,15 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
if (Ptr == 0) {
void* tmp = Creator ? Creator() : 0;
+ TsanHappensBefore(this);
sys::MemoryFence();
+
+ // This write is racy against the first read in the ManagedStatic
+ // accessors. The race is benign because it does a second read after a
+ // memory fence, at which point it isn't possible to get a partial value.
+ TsanIgnoreWritesBegin();
Ptr = tmp;
+ TsanIgnoreWritesEnd();
DeleterFn = Deleter;
// Add to list of managed statics.
@@ -72,4 +79,3 @@ void llvm::llvm_shutdown() {
if (llvm_is_multithreaded()) llvm_stop_multithreaded();
}
-
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 0771af5fee07..16e5c7a9f72b 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/Path.h"
@@ -29,15 +30,12 @@
#include <sys/stat.h>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
-#include <sys/uio.h>
#else
#include <io.h>
#endif
#include <fcntl.h>
using namespace llvm;
-namespace { const llvm::error_code success; }
-
//===----------------------------------------------------------------------===//
// MemoryBuffer implementation itself.
//===----------------------------------------------------------------------===//
@@ -306,7 +304,17 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
RealMapOffset)) {
result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
- return success;
+
+ if (RequiresNullTerminator && result->getBufferEnd()[0] != '\0') {
+ // There could be a racing issue that resulted in the file being larger
+ // than the FileSize passed by the caller. We already have an assertion
+ // for this in MemoryBuffer::init() but have a runtime guarantee that
+ // the buffer will be null-terminated here, so do a copy that adds a
+ // null-terminator.
+ result.reset(MemoryBuffer::getMemBufferCopy(result->getBuffer(),
+ Filename));
+ }
+ return error_code::success();
}
}
@@ -321,29 +329,35 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
char *BufPtr = const_cast<char*>(SB->getBufferStart());
size_t BytesLeft = MapSize;
+#ifndef HAVE_PREAD
if (lseek(FD, Offset, SEEK_SET) == -1)
return error_code(errno, posix_category());
+#endif
while (BytesLeft) {
+#ifdef HAVE_PREAD
+ ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
+#else
ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+#endif
if (NumRead == -1) {
if (errno == EINTR)
continue;
// Error while reading.
return error_code(errno, posix_category());
- } else if (NumRead == 0) {
- // We hit EOF early, truncate and terminate buffer.
- Buf->BufferEnd = BufPtr;
- *BufPtr = 0;
- result.swap(SB);
- return success;
+ }
+ if (NumRead == 0) {
+ assert(0 && "We got inaccurate FileSize value or fstat reported an "
+ "invalid file size.");
+ *BufPtr = '\0'; // null-terminate at the actual size.
+ break;
}
BytesLeft -= NumRead;
BufPtr += NumRead;
}
result.swap(SB);
- return success;
+ return error_code::success();
}
//===----------------------------------------------------------------------===//
@@ -372,5 +386,5 @@ error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
} while (ReadBytes != 0);
result.reset(getMemBufferCopy(Buffer, "<stdin>"));
- return success;
+ return error_code::success();
}
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 8874e943f4c2..da5baab4be46 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -19,7 +19,7 @@
//=== independent code.
//===----------------------------------------------------------------------===//
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
@@ -40,109 +40,80 @@ bool MutexImpl::tryacquire() { return true; }
namespace llvm {
using namespace sys;
-
-// This variable is useful for situations where the pthread library has been
-// compiled with weak linkage for its interface symbols. This allows the
-// threading support to be turned off by simply not linking against -lpthread.
-// In that situation, the value of pthread_mutex_init will be 0 and
-// consequently pthread_enabled will be false. In such situations, all the
-// pthread operations become no-ops and the functions all return false. If
-// pthread_mutex_init does have an address, then mutex support is enabled.
-// Note: all LLVM tools will link against -lpthread if its available since it
-// is configured into the LIBS variable.
-// Note: this line of code generates a warning if pthread_mutex_init is not
-// declared with weak linkage. It's safe to ignore the warning.
-static const bool pthread_enabled = true;
-
// Construct a Mutex using pthread calls
MutexImpl::MutexImpl( bool recursive)
: data_(0)
{
- if (pthread_enabled)
- {
- // Declare the pthread_mutex data structures
- pthread_mutex_t* mutex =
- static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
- pthread_mutexattr_t attr;
-
- // Initialize the mutex attributes
- int errorcode = pthread_mutexattr_init(&attr);
- assert(errorcode == 0);
-
- // Initialize the mutex as a recursive mutex, if requested, or normal
- // otherwise.
- int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
- errorcode = pthread_mutexattr_settype(&attr, kind);
- assert(errorcode == 0);
+ // Declare the pthread_mutex data structures
+ pthread_mutex_t* mutex =
+ static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+ pthread_mutexattr_t attr;
+
+ // Initialize the mutex attributes
+ int errorcode = pthread_mutexattr_init(&attr);
+ assert(errorcode == 0); (void)errorcode;
+
+ // Initialize the mutex as a recursive mutex, if requested, or normal
+ // otherwise.
+ int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+ errorcode = pthread_mutexattr_settype(&attr, kind);
+ assert(errorcode == 0);
#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
- // Make it a process local mutex
- errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
- assert(errorcode == 0);
+ // Make it a process local mutex
+ errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+ assert(errorcode == 0);
#endif
- // Initialize the mutex
- errorcode = pthread_mutex_init(mutex, &attr);
- assert(errorcode == 0);
+ // Initialize the mutex
+ errorcode = pthread_mutex_init(mutex, &attr);
+ assert(errorcode == 0);
- // Destroy the attributes
- errorcode = pthread_mutexattr_destroy(&attr);
- assert(errorcode == 0);
+ // Destroy the attributes
+ errorcode = pthread_mutexattr_destroy(&attr);
+ assert(errorcode == 0);
- // Assign the data member
- data_ = mutex;
- }
+ // Assign the data member
+ data_ = mutex;
}
// Destruct a Mutex
MutexImpl::~MutexImpl()
{
- if (pthread_enabled)
- {
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
- pthread_mutex_destroy(mutex);
- free(mutex);
- }
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+ pthread_mutex_destroy(mutex);
+ free(mutex);
}
bool
MutexImpl::acquire()
{
- if (pthread_enabled)
- {
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
-
- int errorcode = pthread_mutex_lock(mutex);
- return errorcode == 0;
- } else return false;
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_lock(mutex);
+ return errorcode == 0;
}
bool
MutexImpl::release()
{
- if (pthread_enabled)
- {
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
-
- int errorcode = pthread_mutex_unlock(mutex);
- return errorcode == 0;
- } else return false;
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_unlock(mutex);
+ return errorcode == 0;
}
bool
MutexImpl::tryacquire()
{
- if (pthread_enabled)
- {
- pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
-
- int errorcode = pthread_mutex_trylock(mutex);
- return errorcode == 0;
- } else return false;
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_trylock(mutex);
+ return errorcode == 0;
}
}
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index e5b7cd3bfbc2..dcddeda977d1 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -38,16 +38,6 @@ bool Path::operator<(const Path& that) const {
return path < that.path;
}
-Path
-Path::GetLLVMConfigDir() {
- Path result;
-#ifdef LLVM_ETCDIR
- if (result.set(LLVM_ETCDIR))
- return result;
-#endif
- return GetLLVMDefaultConfigDir();
-}
-
LLVMFileType
sys::IdentifyFileType(const char *magic, unsigned length) {
assert(magic && "Invalid magic number string");
@@ -100,7 +90,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
case 0xCF: {
uint16_t type = 0;
if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
- magic[2] == char(0xFA) &&
+ magic[2] == char(0xFA) &&
(magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
/* Native endian */
if (length >= 16) type = magic[14] << 8 | magic[15];
@@ -162,31 +152,31 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
bool
Path::isArchive() const {
- LLVMFileType type;
+ fs::file_magic type;
if (fs::identify_magic(str(), type))
return false;
- return type == Archive_FileType;
+ return type == fs::file_magic::archive;
}
bool
Path::isDynamicLibrary() const {
- LLVMFileType type;
+ fs::file_magic type;
if (fs::identify_magic(str(), type))
return false;
switch (type) {
default: return false;
- case Mach_O_FixedVirtualMemorySharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
- case ELF_SharedObject_FileType:
- case COFF_FileType: return true;
+ case fs::file_magic::macho_fixed_virtual_memory_shared_lib:
+ case fs::file_magic::macho_dynamically_linked_shared_lib:
+ case fs::file_magic::macho_dynamically_linked_shared_lib_stub:
+ case fs::file_magic::elf_shared_object:
+ case fs::file_magic::pecoff_executable: return true;
}
}
bool
Path::isObjectFile() const {
- LLVMFileType type;
- if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+ fs::file_magic type;
+ if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown)
return false;
return true;
}
@@ -222,10 +212,10 @@ Path::appendSuffix(StringRef suffix) {
bool
Path::isBitcodeFile() const {
- LLVMFileType type;
+ fs::file_magic type;
if (fs::identify_magic(str(), type))
return false;
- return type == Bitcode_FileType;
+ return type == fs::file_magic::bitcode;
}
bool Path::hasMagicNumber(StringRef Magic) const {
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index bebe442e2478..e2a69a650db8 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/PathV2.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include <cctype>
#include <cstdio>
@@ -23,15 +24,13 @@ namespace {
using llvm::sys::path::is_separator;
#ifdef LLVM_ON_WIN32
- const StringRef separators = "\\/";
- const char prefered_separator = '\\';
+ const char *separators = "\\/";
+ const char prefered_separator = '\\';
#else
- const StringRef separators = "/";
- const char prefered_separator = '/';
+ const char separators = '/';
+ const char prefered_separator = '/';
#endif
- const llvm::error_code success;
-
StringRef find_first_component(StringRef path) {
// Look for this first component in the following order.
// * empty (in this case we return an empty string)
@@ -347,7 +346,7 @@ const StringRef root_directory(StringRef path) {
const StringRef relative_path(StringRef path) {
StringRef root = root_path(path);
- return root.substr(root.size());
+ return path.substr(root.size());
}
void append(SmallVectorImpl<char> &path, const Twine &a,
@@ -492,7 +491,7 @@ bool is_separator(char value) {
void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.clear();
-
+
// Check whether the temporary directory is specified by an environment
// variable.
const char *EnvironmentVariable;
@@ -505,7 +504,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
return;
}
-
+
// Fall back to a system default.
const char *DefaultResult;
#ifdef LLVM_ON_WIN32
@@ -519,7 +518,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
#endif
result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
}
-
+
bool has_root_name(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
@@ -601,12 +600,16 @@ namespace fs {
error_code make_absolute(SmallVectorImpl<char> &path) {
StringRef p(path.data(), path.size());
- bool rootName = path::has_root_name(p),
- rootDirectory = path::has_root_directory(p);
+ bool rootDirectory = path::has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+ rootName = path::has_root_name(p);
+#else
+ rootName = true;
+#endif
// Already absolute.
if (rootName && rootDirectory)
- return success;
+ return error_code::success();
// All of the following conditions will need the current directory.
SmallString<128> current_dir;
@@ -618,7 +621,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
path::append(current_dir, p);
// Set path to the result.
path.swap(current_dir);
- return success;
+ return error_code::success();
}
if (!rootName && rootDirectory) {
@@ -627,7 +630,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
path::append(curDirRootName, p);
// Set path to the result.
path.swap(curDirRootName);
- return success;
+ return error_code::success();
}
if (rootName && !rootDirectory) {
@@ -639,7 +642,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
SmallString<128> res;
path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
path.swap(res);
- return success;
+ return error_code::success();
}
llvm_unreachable("All rootName and rootDirectory combinations should have "
@@ -651,12 +654,13 @@ error_code create_directories(const Twine &path, bool &existed) {
StringRef p = path.toStringRef(path_storage);
StringRef parent = path::parent_path(p);
- bool parent_exists;
+ if (!parent.empty()) {
+ bool parent_exists;
+ if (error_code ec = fs::exists(parent, parent_exists)) return ec;
- if (error_code ec = fs::exists(parent, parent_exists)) return ec;
-
- if (!parent_exists)
- if (error_code ec = create_directories(parent, existed)) return ec;
+ if (!parent_exists)
+ if (error_code ec = create_directories(parent, existed)) return ec;
+ }
return create_directory(p, existed);
}
@@ -678,7 +682,7 @@ error_code is_directory(const Twine &path, bool &result) {
if (error_code ec = status(path, st))
return ec;
result = is_directory(st);
- return success;
+ return error_code::success();
}
bool is_regular_file(file_status status) {
@@ -690,7 +694,7 @@ error_code is_regular_file(const Twine &path, bool &result) {
if (error_code ec = status(path, st))
return ec;
result = is_regular_file(st);
- return success;
+ return error_code::success();
}
bool is_symlink(file_status status) {
@@ -702,7 +706,7 @@ error_code is_symlink(const Twine &path, bool &result) {
if (error_code ec = status(path, st))
return ec;
result = is_symlink(st);
- return success;
+ return error_code::success();
}
bool is_other(file_status status) {
@@ -729,23 +733,134 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
if (ec == errc::value_too_large) {
// Magic.size() > file_size(Path).
result = false;
- return success;
+ return error_code::success();
}
return ec;
}
result = Magic == Buffer;
- return success;
+ return error_code::success();
+}
+
+/// @brief Identify the magic in magic.
+file_magic identify_magic(StringRef magic) {
+ switch ((unsigned char)magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return file_magic::bitcode;
+ break;
+ case 'B':
+ if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+ return file_magic::bitcode;
+ break;
+ case '!':
+ if (magic.size() >= 8)
+ if (memcmp(magic.data(),"!<arch>\n",8) == 0)
+ return file_magic::archive;
+ break;
+
+ case '\177':
+ if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+ if (magic.size() >= 18 && magic[17] == 0)
+ switch (magic[16]) {
+ default: break;
+ case 1: return file_magic::elf_relocatable;
+ case 2: return file_magic::elf_executable;
+ case 3: return file_magic::elf_shared_object;
+ case 4: return file_magic::elf_core;
+ }
+ }
+ break;
+
+ case 0xCA:
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ magic[3] == char(0xBE)) {
+ // This is complicated by an overlap with Java class files.
+ // See the Mach-O section in /usr/share/file/magic for details.
+ if (magic.size() >= 8 && magic[7] < 43)
+ // FIXME: Universal Binary of any type.
+ return file_magic::macho_dynamically_linked_shared_lib;
+ }
+ break;
+
+ // The two magic numbers for mach-o are:
+ // 0xfeedface - 32-bit mach-o
+ // 0xfeedfacf - 64-bit mach-o
+ case 0xFE:
+ case 0xCE:
+ case 0xCF: {
+ uint16_t type = 0;
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ magic[2] == char(0xFA) &&
+ (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+ /* Native endian */
+ if (magic.size() >= 16) type = magic[14] << 8 | magic[15];
+ } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+ magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+ magic[3] == char(0xFE)) {
+ /* Reverse endian */
+ if (magic.size() >= 14) type = magic[13] << 8 | magic[12];
+ }
+ switch (type) {
+ default: break;
+ case 1: return file_magic::macho_object;
+ case 2: return file_magic::macho_executable;
+ case 3: return file_magic::macho_fixed_virtual_memory_shared_lib;
+ case 4: return file_magic::macho_core;
+ case 5: return file_magic::macho_preload_executabl;
+ case 6: return file_magic::macho_dynamically_linked_shared_lib;
+ case 7: return file_magic::macho_dynamic_linker;
+ case 8: return file_magic::macho_bundle;
+ case 9: return file_magic::macho_dynamic_linker;
+ case 10: return file_magic::macho_dsym_companion;
+ }
+ break;
+ }
+ case 0xF0: // PowerPC Windows
+ case 0x83: // Alpha 32-bit
+ case 0x84: // Alpha 64-bit
+ case 0x66: // MPS R4000 Windows
+ case 0x50: // mc68K
+ case 0x4c: // 80386 Windows
+ if (magic[1] == 0x01)
+ return file_magic::coff_object;
+
+ case 0x90: // PA-RISC Windows
+ case 0x68: // mc68K Windows
+ if (magic[1] == 0x02)
+ return file_magic::coff_object;
+ break;
+
+ case 0x4d: // Possible MS-DOS stub on Windows PE file
+ if (magic[1] == 0x5a) {
+ uint32_t off =
+ *reinterpret_cast<const support::ulittle32_t*>(magic.data() + 0x3c);
+ // PE/COFF file, either EXE or DLL.
+ if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0)
+ return file_magic::pecoff_executable;
+ }
+ break;
+
+ case 0x64: // x86-64 Windows.
+ if (magic[1] == char(0x86))
+ return file_magic::coff_object;
+ break;
+
+ default:
+ break;
+ }
+ return file_magic::unknown;
}
-error_code identify_magic(const Twine &path, LLVMFileType &result) {
+error_code identify_magic(const Twine &path, file_magic &result) {
SmallString<32> Magic;
error_code ec = get_magic(path, Magic.capacity(), Magic);
if (ec && ec != errc::value_too_large)
return ec;
- result = IdentifyFileType(Magic.data(), Magic.size());
- return success;
+ result = identify_magic(Magic);
+ return error_code::success();
}
namespace {
@@ -753,7 +868,9 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
if (ft == file_type::directory_file) {
// This code would be a lot better with exceptions ;/.
error_code ec;
- for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+ directory_iterator i(path, ec);
+ if (ec) return ec;
+ for (directory_iterator e; i != e; i.increment(ec)) {
if (ec) return ec;
file_status st;
if (error_code ec = i->status(st)) return ec;
@@ -770,7 +887,7 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
++count;
}
- return success;
+ return error_code::success();
}
} // end unnamed namespace
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 01860b082d62..75bc282d9bd4 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
using namespace llvm;
using namespace sys;
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index d0b1e10b56fb..6a34f2d08524 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -20,7 +20,7 @@
//=== independent code.
//===----------------------------------------------------------------------===//
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
@@ -42,107 +42,75 @@ bool RWMutexImpl::writer_release() { return true; }
namespace llvm {
using namespace sys;
-
-// This variable is useful for situations where the pthread library has been
-// compiled with weak linkage for its interface symbols. This allows the
-// threading support to be turned off by simply not linking against -lpthread.
-// In that situation, the value of pthread_mutex_init will be 0 and
-// consequently pthread_enabled will be false. In such situations, all the
-// pthread operations become no-ops and the functions all return false. If
-// pthread_rwlock_init does have an address, then rwlock support is enabled.
-// Note: all LLVM tools will link against -lpthread if its available since it
-// is configured into the LIBS variable.
-// Note: this line of code generates a warning if pthread_rwlock_init is not
-// declared with weak linkage. It's safe to ignore the warning.
-static const bool pthread_enabled = true;
-
// Construct a RWMutex using pthread calls
RWMutexImpl::RWMutexImpl()
: data_(0)
{
- if (pthread_enabled)
- {
- // Declare the pthread_rwlock data structures
- pthread_rwlock_t* rwlock =
- static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+ // Declare the pthread_rwlock data structures
+ pthread_rwlock_t* rwlock =
+ static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
#ifdef __APPLE__
- // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
- bzero(rwlock, sizeof(pthread_rwlock_t));
+ // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+ bzero(rwlock, sizeof(pthread_rwlock_t));
#endif
- // Initialize the rwlock
- int errorcode = pthread_rwlock_init(rwlock, NULL);
- (void)errorcode;
- assert(errorcode == 0);
+ // Initialize the rwlock
+ int errorcode = pthread_rwlock_init(rwlock, NULL);
+ (void)errorcode;
+ assert(errorcode == 0);
- // Assign the data member
- data_ = rwlock;
- }
+ // Assign the data member
+ data_ = rwlock;
}
// Destruct a RWMutex
RWMutexImpl::~RWMutexImpl()
{
- if (pthread_enabled)
- {
- pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
- pthread_rwlock_destroy(rwlock);
- free(rwlock);
- }
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+ pthread_rwlock_destroy(rwlock);
+ free(rwlock);
}
bool
RWMutexImpl::reader_acquire()
{
- if (pthread_enabled)
- {
- pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
-
- int errorcode = pthread_rwlock_rdlock(rwlock);
- return errorcode == 0;
- } else return false;
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_rdlock(rwlock);
+ return errorcode == 0;
}
bool
RWMutexImpl::reader_release()
{
- if (pthread_enabled)
- {
- pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
-
- int errorcode = pthread_rwlock_unlock(rwlock);
- return errorcode == 0;
- } else return false;
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
}
bool
RWMutexImpl::writer_acquire()
{
- if (pthread_enabled)
- {
- pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
-
- int errorcode = pthread_rwlock_wrlock(rwlock);
- return errorcode == 0;
- } else return false;
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_wrlock(rwlock);
+ return errorcode == 0;
}
bool
RWMutexImpl::writer_release()
{
- if (pthread_enabled)
- {
- pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
-
- int errorcode = pthread_rwlock_unlock(rwlock);
- return errorcode == 0;
- } else return false;
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
}
}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 997ce0b74cd2..68d9c29411f0 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/MathExtras.h"
+#include <algorithm>
#include <cstdlib>
using namespace llvm;
@@ -223,6 +224,56 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
NumTombstones = RHS.NumTombstones;
}
+void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) {
+ if (this == &RHS) return;
+
+ // We can only avoid copying elements if neither set is small.
+ if (!this->isSmall() && !RHS.isSmall()) {
+ std::swap(this->CurArray, RHS.CurArray);
+ std::swap(this->CurArraySize, RHS.CurArraySize);
+ std::swap(this->NumElements, RHS.NumElements);
+ std::swap(this->NumTombstones, RHS.NumTombstones);
+ return;
+ }
+
+ // FIXME: From here on we assume that both sets have the same small size.
+
+ // If only RHS is small, copy the small elements into LHS and move the pointer
+ // from LHS to RHS.
+ if (!this->isSmall() && RHS.isSmall()) {
+ std::copy(RHS.SmallArray, RHS.SmallArray+RHS.CurArraySize,
+ this->SmallArray);
+ std::swap(this->NumElements, RHS.NumElements);
+ std::swap(this->CurArraySize, RHS.CurArraySize);
+ RHS.CurArray = this->CurArray;
+ RHS.NumTombstones = this->NumTombstones;
+ this->CurArray = this->SmallArray;
+ this->NumTombstones = 0;
+ return;
+ }
+
+ // If only LHS is small, copy the small elements into RHS and move the pointer
+ // from RHS to LHS.
+ if (this->isSmall() && !RHS.isSmall()) {
+ std::copy(this->SmallArray, this->SmallArray+this->CurArraySize,
+ RHS.SmallArray);
+ std::swap(RHS.NumElements, this->NumElements);
+ std::swap(RHS.CurArraySize, this->CurArraySize);
+ this->CurArray = RHS.CurArray;
+ this->NumTombstones = RHS.NumTombstones;
+ RHS.CurArray = RHS.SmallArray;
+ RHS.NumTombstones = 0;
+ return;
+ }
+
+ // Both a small, just swap the small elements.
+ assert(this->isSmall() && RHS.isSmall());
+ assert(this->CurArraySize == RHS.CurArraySize);
+ std::swap_ranges(this->SmallArray, this->SmallArray+this->CurArraySize,
+ RHS.SmallArray);
+ std::swap(this->NumElements, RHS.NumElements);
+}
+
SmallPtrSetImpl::~SmallPtrSetImpl() {
if (!isSmall())
free(CurArray);
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index de042a9f53c8..bbe36b260b9d 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -140,8 +140,9 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
///
/// @param Type - If non-null, the kind of message (e.g., "error") which is
/// prefixed to the message.
-SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
- const char *Type, bool ShowLine) const {
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+ const Twine &Msg,
+ ArrayRef<SMRange> Ranges) const {
// First thing to do: find the current buffer containing the specified
// location.
@@ -156,33 +157,48 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
LineStart[-1] != '\n' && LineStart[-1] != '\r')
--LineStart;
- std::string LineStr;
- if (ShowLine) {
- // Get the end of the line.
- const char *LineEnd = Loc.getPointer();
- while (LineEnd != CurMB->getBufferEnd() &&
- LineEnd[0] != '\n' && LineEnd[0] != '\r')
- ++LineEnd;
- LineStr = std::string(LineStart, LineEnd);
- }
-
- std::string PrintedMsg;
- raw_string_ostream OS(PrintedMsg);
- if (Type)
- OS << Type << ": ";
- OS << Msg;
+ // Get the end of the line.
+ const char *LineEnd = Loc.getPointer();
+ while (LineEnd != CurMB->getBufferEnd() &&
+ LineEnd[0] != '\n' && LineEnd[0] != '\r')
+ ++LineEnd;
+ std::string LineStr(LineStart, LineEnd);
+ // Convert any ranges to column ranges that only intersect the line of the
+ // location.
+ SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
+ for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
+ SMRange R = Ranges[i];
+ if (!R.isValid()) continue;
+
+ // If the line doesn't contain any part of the range, then ignore it.
+ if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
+ continue;
+
+ // Ignore pieces of the range that go onto other lines.
+ if (R.Start.getPointer() < LineStart)
+ R.Start = SMLoc::getFromPointer(LineStart);
+ if (R.End.getPointer() > LineEnd)
+ R.End = SMLoc::getFromPointer(LineEnd);
+
+ // Translate from SMLoc ranges to column ranges.
+ ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
+ R.End.getPointer()-LineStart));
+ }
+
return SMDiagnostic(*this, Loc,
CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
- Loc.getPointer()-LineStart, OS.str(),
- LineStr, ShowLine);
+ Loc.getPointer()-LineStart, Kind, Msg.str(),
+ LineStr, ColRanges);
}
-void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
- const char *Type, bool ShowLine) const {
+void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+ const Twine &Msg, ArrayRef<SMRange> Ranges) const {
+ SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges);
+
// Report the message with the diagnostic handler if present.
if (DiagHandler) {
- DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
+ DiagHandler(Diagnostic, DiagContext);
return;
}
@@ -192,14 +208,24 @@ void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
assert(CurBuf != -1 && "Invalid or unspecified location!");
PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
- GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
+ Diagnostic.print(0, OS);
}
//===----------------------------------------------------------------------===//
// SMDiagnostic Implementation
//===----------------------------------------------------------------------===//
-void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
+SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
+ int Line, int Col, SourceMgr::DiagKind Kind,
+ const std::string &Msg,
+ const std::string &LineStr,
+ ArrayRef<std::pair<unsigned,unsigned> > Ranges)
+ : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
+ Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()) {
+}
+
+
+void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const {
if (ProgName && ProgName[0])
S << ProgName << ": ";
@@ -217,16 +243,71 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
S << ": ";
}
+ switch (Kind) {
+ case SourceMgr::DK_Error: S << "error: "; break;
+ case SourceMgr::DK_Warning: S << "warning: "; break;
+ case SourceMgr::DK_Note: S << "note: "; break;
+ }
+
S << Message << '\n';
- if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
- S << LineContents << '\n';
+ if (LineNo == -1 || ColumnNo == -1)
+ return;
- // Print out spaces/tabs before the caret.
- for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
- S << (LineContents[i] == '\t' ? '\t' : ' ');
- S << "^\n";
+ // Build the line with the caret and ranges.
+ std::string CaretLine(LineContents.size()+1, ' ');
+
+ // Expand any ranges.
+ for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
+ std::pair<unsigned, unsigned> R = Ranges[r];
+ for (unsigned i = R.first,
+ e = std::min(R.second, (unsigned)LineContents.size())+1; i != e; ++i)
+ CaretLine[i] = '~';
+ }
+
+ // Finally, plop on the caret.
+ if (unsigned(ColumnNo) <= LineContents.size())
+ CaretLine[ColumnNo] = '^';
+ else
+ CaretLine[LineContents.size()] = '^';
+
+ // ... and remove trailing whitespace so the output doesn't wrap for it. We
+ // know that the line isn't completely empty because it has the caret in it at
+ // least.
+ CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
+
+ // Print out the source line one character at a time, so we can expand tabs.
+ for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
+ if (LineContents[i] != '\t') {
+ S << LineContents[i];
+ ++OutCol;
+ continue;
+ }
+
+ // If we have a tab, emit at least one space, then round up to 8 columns.
+ do {
+ S << ' ';
+ ++OutCol;
+ } while (OutCol & 7);
+ }
+ S << '\n';
+
+ // Print out the caret line, matching tabs in the source line.
+ for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
+ if (i >= LineContents.size() || LineContents[i] != '\t') {
+ S << CaretLine[i];
+ ++OutCol;
+ continue;
+ }
+
+ // Okay, we have a tab. Insert the appropriate number of characters.
+ do {
+ S << CaretLine[i];
+ ++OutCol;
+ } while (OutCol & 7);
}
+
+ S << '\n';
}
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 1e733d92e610..d8a6ad35ba9c 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Mutex.h"
@@ -72,9 +73,12 @@ void Statistic::RegisterStatistic() {
if (Enabled)
StatInfo->addStatistic(this);
+ TsanHappensBefore(this);
sys::MemoryFence();
// Remember we have been registered.
+ TsanIgnoreWritesBegin();
Initialized = true;
+ TsanIgnoreWritesEnd();
}
}
@@ -126,13 +130,11 @@ void llvm::PrintStatistics(raw_ostream &OS) {
<< "===" << std::string(73, '-') << "===\n\n";
// Print all of the statistics.
- for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
- std::string CountStr = utostr(Stats.Stats[i]->getValue());
- OS << std::string(MaxValLen-CountStr.size(), ' ')
- << CountStr << " " << Stats.Stats[i]->getName()
- << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
- << " - " << Stats.Stats[i]->getDesc() << "\n";
- }
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i)
+ OS << format("%*u %-*s - %s\n",
+ MaxValLen, Stats.Stats[i]->getValue(),
+ MaxNameLen, Stats.Stats[i]->getName(),
+ Stats.Stats[i]->getDesc());
OS << '\n'; // Flush the output stream.
OS.flush();
diff --git a/lib/Support/StreamableMemoryObject.cpp b/lib/Support/StreamableMemoryObject.cpp
new file mode 100644
index 000000000000..c23f07b8fc3c
--- /dev/null
+++ b/lib/Support/StreamableMemoryObject.cpp
@@ -0,0 +1,140 @@
+//===- StreamableMemoryObject.cpp - Streamable data interface -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <cassert>
+#include <cstring>
+
+
+using namespace llvm;
+
+namespace {
+
+class RawMemoryObject : public StreamableMemoryObject {
+public:
+ RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
+ FirstChar(Start), LastChar(End) {
+ assert(LastChar > FirstChar && "Invalid start/end range");
+ }
+
+ virtual uint64_t getBase() const { return 0; }
+ virtual uint64_t getExtent() const { return LastChar - FirstChar; }
+ virtual int readByte(uint64_t address, uint8_t* ptr) const;
+ virtual int readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const;
+ virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const;
+ virtual bool isValidAddress(uint64_t address) const {
+ return validAddress(address);
+ }
+ virtual bool isObjectEnd(uint64_t address) const {return objectEnd(address);}
+
+private:
+ const uint8_t* const FirstChar;
+ const uint8_t* const LastChar;
+
+ // These are implemented as inline functions here to avoid multiple virtual
+ // calls per public function
+ bool validAddress(uint64_t address) const {
+ return static_cast<ptrdiff_t>(address) < LastChar - FirstChar;
+ }
+ bool objectEnd(uint64_t address) const {
+ return static_cast<ptrdiff_t>(address) == LastChar - FirstChar;
+ }
+
+ RawMemoryObject(const RawMemoryObject&); // DO NOT IMPLEMENT
+ void operator=(const RawMemoryObject&); // DO NOT IMPLEMENT
+};
+
+int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
+ if (!validAddress(address)) return -1;
+ *ptr = *((uint8_t *)(uintptr_t)(address + FirstChar));
+ return 0;
+}
+
+int RawMemoryObject::readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const {
+ if (!validAddress(address) || !validAddress(address + size - 1)) return -1;
+ memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size);
+ if (copied) *copied = size;
+ return size;
+}
+
+const uint8_t *RawMemoryObject::getPointer(uint64_t address,
+ uint64_t size) const {
+ return FirstChar + address;
+}
+} // anonymous namespace
+
+namespace llvm {
+// If the bitcode has a header, then its size is known, and we don't have to
+// block until we actually want to read it.
+bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
+ if (ObjectSize && address < ObjectSize) return true;
+ return fetchToPos(address);
+}
+
+bool StreamingMemoryObject::isObjectEnd(uint64_t address) const {
+ if (ObjectSize) return address == ObjectSize;
+ fetchToPos(address);
+ return address == ObjectSize && address != 0;
+}
+
+uint64_t StreamingMemoryObject::getExtent() const {
+ if (ObjectSize) return ObjectSize;
+ size_t pos = BytesRead + kChunkSize;
+ // keep fetching until we run out of bytes
+ while (fetchToPos(pos)) pos += kChunkSize;
+ return ObjectSize;
+}
+
+int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
+ if (!fetchToPos(address)) return -1;
+ *ptr = Bytes[address + BytesSkipped];
+ return 0;
+}
+
+int StreamingMemoryObject::readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const {
+ if (!fetchToPos(address + size - 1)) return -1;
+ memcpy(buf, &Bytes[address + BytesSkipped], size);
+ if (copied) *copied = size;
+ return 0;
+}
+
+bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
+ if (BytesRead < s) return true;
+ BytesSkipped = s;
+ BytesRead -= s;
+ return false;
+}
+
+void StreamingMemoryObject::setKnownObjectSize(size_t size) {
+ ObjectSize = size;
+ Bytes.reserve(size);
+}
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+ const unsigned char *Start, const unsigned char *End) {
+ return new RawMemoryObject(Start, End);
+}
+
+StreamableMemoryObject::~StreamableMemoryObject() { }
+
+StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
+ Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
+ ObjectSize(0), EOFReached(false) {
+ BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
+}
+}
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 49c5ac4252c8..d77ad7f55a18 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -57,24 +57,3 @@ void llvm::SplitString(StringRef Source,
S = getToken(S.second, Delimiters);
}
}
-
-void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
- StringRef Separators, int MaxSplit,
- bool KeepEmpty) const {
- StringRef rest = *this;
-
- // rest.data() is used to distinguish cases like "a," that splits into
- // "a" + "" and "a" that splits into "a" + 0.
- for (int splits = 0;
- rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
- ++splits) {
- std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
-
- if (p.first.size() != 0 || KeepEmpty)
- A.push_back(p.first);
- rest = p.second;
- }
- // If we have a tail left, add it.
- if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
- A.push_back(rest);
-}
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index a1ac512fa244..c131fe07f48d 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -39,11 +39,13 @@ void StringMapImpl::init(unsigned InitSize) {
NumItems = 0;
NumTombstones = 0;
- TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
-
+ TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
+ sizeof(StringMapEntryBase **) +
+ sizeof(unsigned));
+
// Allocate one extra bucket, set it to look filled so the iterators stop at
// end.
- TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+ TheTable[NumBuckets] = (StringMapEntryBase*)2;
}
@@ -60,29 +62,29 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
}
unsigned FullHashValue = HashString(Name);
unsigned BucketNo = FullHashValue & (HTSize-1);
-
+ unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
unsigned ProbeAmt = 1;
int FirstTombstone = -1;
while (1) {
- ItemBucket &Bucket = TheTable[BucketNo];
- StringMapEntryBase *BucketItem = Bucket.Item;
+ StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return it.
if (BucketItem == 0) {
// If we found a tombstone, we want to reuse the tombstone instead of an
// empty bucket. This reduces probing.
if (FirstTombstone != -1) {
- TheTable[FirstTombstone].FullHashValue = FullHashValue;
+ HashTable[FirstTombstone] = FullHashValue;
return FirstTombstone;
}
- Bucket.FullHashValue = FullHashValue;
+ HashTable[BucketNo] = FullHashValue;
return BucketNo;
}
if (BucketItem == getTombstoneVal()) {
// Skip over tombstones. However, remember the first one we see.
if (FirstTombstone == -1) FirstTombstone = BucketNo;
- } else if (Bucket.FullHashValue == FullHashValue) {
+ } else if (HashTable[BucketNo] == FullHashValue) {
// If the full hash value matches, check deeply for a match. The common
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
@@ -115,18 +117,18 @@ int StringMapImpl::FindKey(StringRef Key) const {
if (HTSize == 0) return -1; // Really empty table?
unsigned FullHashValue = HashString(Key);
unsigned BucketNo = FullHashValue & (HTSize-1);
-
+ unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
unsigned ProbeAmt = 1;
while (1) {
- ItemBucket &Bucket = TheTable[BucketNo];
- StringMapEntryBase *BucketItem = Bucket.Item;
+ StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return.
if (BucketItem == 0)
return -1;
if (BucketItem == getTombstoneVal()) {
// Ignore tombstones.
- } else if (Bucket.FullHashValue == FullHashValue) {
+ } else if (HashTable[BucketNo] == FullHashValue) {
// If the full hash value matches, check deeply for a match. The common
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
@@ -165,8 +167,8 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
int Bucket = FindKey(Key);
if (Bucket == -1) return 0;
- StringMapEntryBase *Result = TheTable[Bucket].Item;
- TheTable[Bucket].Item = getTombstoneVal();
+ StringMapEntryBase *Result = TheTable[Bucket];
+ TheTable[Bucket] = getTombstoneVal();
--NumItems;
++NumTombstones;
assert(NumItems + NumTombstones <= NumBuckets);
@@ -180,6 +182,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
/// the appropriate mod-of-hashtable-size.
void StringMapImpl::RehashTable() {
unsigned NewSize;
+ unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
// If the hash table is now more than 3/4 full, or if fewer than 1/8 of
// the buckets are empty (meaning that many are filled with tombstones),
@@ -194,19 +197,23 @@ void StringMapImpl::RehashTable() {
// Allocate one extra bucket which will always be non-empty. This allows the
// iterators to stop at end.
- ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
- NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
-
+ StringMapEntryBase **NewTableArray =
+ (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
+ sizeof(unsigned));
+ unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
+ NewTableArray[NewSize] = (StringMapEntryBase*)2;
+
// Rehash all the items into their new buckets. Luckily :) we already have
// the hash values available, so we don't have to rehash any strings.
- for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
- if (IB->Item && IB->Item != getTombstoneVal()) {
+ for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+ StringMapEntryBase *Bucket = TheTable[I];
+ if (Bucket && Bucket != getTombstoneVal()) {
// Fast case, bucket available.
- unsigned FullHash = IB->FullHashValue;
+ unsigned FullHash = HashTable[I];
unsigned NewBucket = FullHash & (NewSize-1);
- if (NewTableArray[NewBucket].Item == 0) {
- NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
- NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+ if (NewTableArray[NewBucket] == 0) {
+ NewTableArray[FullHash & (NewSize-1)] = Bucket;
+ NewHashArray[FullHash & (NewSize-1)] = FullHash;
continue;
}
@@ -214,11 +221,11 @@ void StringMapImpl::RehashTable() {
unsigned ProbeSize = 1;
do {
NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
- } while (NewTableArray[NewBucket].Item);
+ } while (NewTableArray[NewBucket]);
// Finally found a slot. Fill it in.
- NewTableArray[NewBucket].Item = IB->Item;
- NewTableArray[NewBucket].FullHashValue = FullHash;
+ NewTableArray[NewBucket] = Bucket;
+ NewHashArray[NewBucket] = FullHash;
}
}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index b5b4f9476026..abe570f6df4b 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -10,6 +10,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/edit_distance.h"
#include <bitset>
using namespace llvm;
@@ -25,6 +27,12 @@ static char ascii_tolower(char x) {
return x;
}
+static char ascii_toupper(char x) {
+ if (x >= 'a' && x <= 'z')
+ return x - 'a' + 'A';
+ return x;
+}
+
static bool ascii_isdigit(char x) {
return x >= '0' && x <= '9';
}
@@ -78,56 +86,29 @@ int StringRef::compare_numeric(StringRef RHS) const {
unsigned StringRef::edit_distance(llvm::StringRef Other,
bool AllowReplacements,
unsigned MaxEditDistance) {
- // The algorithm implemented below is the "classic"
- // dynamic-programming algorithm for computing the Levenshtein
- // distance, which is described here:
- //
- // http://en.wikipedia.org/wiki/Levenshtein_distance
- //
- // Although the algorithm is typically described using an m x n
- // array, only two rows are used at a time, so this implemenation
- // just keeps two separate vectors for those two rows.
- size_type m = size();
- size_type n = Other.size();
-
- const unsigned SmallBufferSize = 64;
- unsigned SmallBuffer[SmallBufferSize];
- llvm::OwningArrayPtr<unsigned> Allocated;
- unsigned *previous = SmallBuffer;
- if (2*(n + 1) > SmallBufferSize) {
- previous = new unsigned [2*(n+1)];
- Allocated.reset(previous);
- }
- unsigned *current = previous + (n + 1);
-
- for (unsigned i = 0; i <= n; ++i)
- previous[i] = i;
-
- for (size_type y = 1; y <= m; ++y) {
- current[0] = y;
- unsigned BestThisRow = current[0];
-
- for (size_type x = 1; x <= n; ++x) {
- if (AllowReplacements) {
- current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
- min(current[x-1], previous[x])+1);
- }
- else {
- if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
- else current[x] = min(current[x-1], previous[x]) + 1;
- }
- BestThisRow = min(BestThisRow, current[x]);
- }
+ return llvm::ComputeEditDistance(
+ llvm::ArrayRef<char>(data(), size()),
+ llvm::ArrayRef<char>(Other.data(), Other.size()),
+ AllowReplacements, MaxEditDistance);
+}
- if (MaxEditDistance && BestThisRow > MaxEditDistance)
- return MaxEditDistance + 1;
+//===----------------------------------------------------------------------===//
+// String Operations
+//===----------------------------------------------------------------------===//
- unsigned *tmp = current;
- current = previous;
- previous = tmp;
+std::string StringRef::lower() const {
+ std::string Result(size(), char());
+ for (size_type i = 0, e = size(); i != e; ++i) {
+ Result[i] = ascii_tolower(Data[i]);
}
+ return Result;
+}
- unsigned Result = previous[n];
+std::string StringRef::upper() const {
+ std::string Result(size(), char());
+ for (size_type i = 0, e = size(); i != e; ++i) {
+ Result[i] = ascii_toupper(Data[i]);
+ }
return Result;
}
@@ -144,9 +125,35 @@ size_t StringRef::find(StringRef Str, size_t From) const {
size_t N = Str.size();
if (N > Length)
return npos;
- for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
- if (substr(i, N).equals(Str))
- return i;
+
+ // For short haystacks or unsupported needles fall back to the naive algorithm
+ if (Length < 16 || N > 255 || N == 0) {
+ for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+ if (substr(i, N).equals(Str))
+ return i;
+ return npos;
+ }
+
+ if (From >= Length)
+ return npos;
+
+ // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
+ uint8_t BadCharSkip[256];
+ std::memset(BadCharSkip, N, 256);
+ for (unsigned i = 0; i != N-1; ++i)
+ BadCharSkip[(uint8_t)Str[i]] = N-1-i;
+
+ unsigned Len = Length-From, Pos = From;
+ while (Len >= N) {
+ if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
+ return Pos;
+
+ // Otherwise skip the appropriate number of bytes.
+ uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
+ Len -= Skip;
+ Pos += Skip;
+ }
+
return npos;
}
@@ -223,6 +230,27 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars,
return npos;
}
+void StringRef::split(SmallVectorImpl<StringRef> &A,
+ StringRef Separators, int MaxSplit,
+ bool KeepEmpty) const {
+ StringRef rest = *this;
+
+ // rest.data() is used to distinguish cases like "a," that splits into
+ // "a" + "" and "a" that splits into "a" + 0.
+ for (int splits = 0;
+ rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+ ++splits) {
+ std::pair<StringRef, StringRef> p = rest.split(Separators);
+
+ if (KeepEmpty || p.first.size() != 0)
+ A.push_back(p.first);
+ rest = p.second;
+ }
+ // If we have a tail left, add it.
+ if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+ A.push_back(rest);
+}
+
//===----------------------------------------------------------------------===//
// Helpful Algorithms
//===----------------------------------------------------------------------===//
@@ -257,8 +285,8 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
-static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
- unsigned long long &Result) {
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+ unsigned long long &Result) {
// Autosense radix if not specified.
if (Radix == 0)
Radix = GetAutoSenseRadix(Str);
@@ -298,17 +326,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
return false;
}
-bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
- return GetAsUnsignedInteger(*this, Radix, Result);
-}
-
-
-bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+ long long &Result) {
unsigned long long ULLVal;
// Handle positive strings first.
- if (empty() || front() != '-') {
- if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+ if (Str.empty() || Str.front() != '-') {
+ if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
// Check for value so large it overflows a signed value.
(long long)ULLVal < 0)
return true;
@@ -317,7 +341,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
}
// Get the positive part of the value.
- if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+ if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
// Reject values so large they'd overflow as negative signed, but allow
// "-0". This negates the unsigned so that the negative isn't undefined
// on signed overflow.
@@ -328,24 +352,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
return false;
}
-bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
- long long Val;
- if (getAsInteger(Radix, Val) ||
- (int)Val != Val)
- return true;
- Result = Val;
- return false;
-}
-
-bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
- unsigned long long Val;
- if (getAsInteger(Radix, Val) ||
- (unsigned)Val != Val)
- return true;
- Result = Val;
- return false;
-}
-
bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
StringRef Str = *this;
@@ -420,3 +426,9 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
return false;
}
+
+
+// Implementation of StringRef hashing.
+hash_code llvm::hash_value(StringRef S) {
+ return hash_combine_range(S.begin(), S.end());
+}
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index 7497bfe035c6..53c8d84e7d45 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -84,7 +84,7 @@ void TargetRegistry::RegisterTarget(Target &T,
}
const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
- const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error);
+ const Target *TheTarget = lookupTarget(sys::getDefaultTargetTriple(), Error);
if (TheTarget && !TheTarget->hasJIT()) {
Error = "No JIT compatible target available for this host";
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index fdb251c0a36b..08b12b658bea 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -19,7 +19,7 @@
//=== independent code.
//===----------------------------------------------------------------------===//
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 8f0bb93eb4d1..7483225fdfb0 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -24,7 +24,7 @@ static bool multithreaded_mode = false;
static sys::Mutex* global_lock = 0;
bool llvm::llvm_start_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
assert(!multithreaded_mode && "Already multithreaded!");
multithreaded_mode = true;
global_lock = new sys::Mutex(true);
@@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() {
}
void llvm::llvm_stop_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
assert(multithreaded_mode && "Not currently multithreaded!");
// We fence here to insure that all threaded operations are complete BEFORE we
@@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() {
if (multithreaded_mode) global_lock->release();
}
-#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
+#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
#include <pthread.h>
struct ThreadInfo {
@@ -102,7 +102,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
error:
::pthread_attr_destroy(&Attr);
}
-#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
+#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
#include "Windows/Windows.h"
#include <process.h>
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index a9ed5eecfa7e..598e8ad6a1a5 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -168,10 +168,8 @@ void Timer::stopTimer() {
static void printVal(double Val, double Total, raw_ostream &OS) {
if (Total < 1e-7) // Avoid dividing by zero.
OS << " ----- ";
- else {
- OS << " " << format("%7.4f", Val) << " (";
- OS << format("%5.1f", Val*100/Total) << "%)";
- }
+ else
+ OS << format(" %7.4f (%5.1f%%)", Val, Val*100/Total);
}
void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
@@ -186,7 +184,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
OS << " ";
if (Total.getMemUsed())
- OS << format("%9lld", (long long)getMemUsed()) << " ";
+ OS << format("%9" PRId64 " ", (int64_t)getMemUsed());
}
@@ -332,11 +330,9 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
// If this is not an collection of ungrouped times, print the total time.
// Ungrouped timers don't really make sense to add up. We still print the
// TOTAL line to make the percentages make sense.
- if (this != DefaultTimerGroup) {
- OS << " Total Execution Time: ";
- OS << format("%5.4f", Total.getProcessTime()) << " seconds (";
- OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
- }
+ if (this != DefaultTimerGroup)
+ OS << format(" Total Execution Time: %5.4f seconds (%5.4f wall clock)\n",
+ Total.getProcessTime(), Total.getWallTime());
OS << '\n';
if (Total.getUserTime())
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c61af372d79c..44a1b38d98d1 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -9,19 +9,19 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cstring>
using namespace llvm;
const char *Triple::getArchTypeName(ArchType Kind) {
switch (Kind) {
- case InvalidArch: return "<invalid>";
case UnknownArch: return "unknown";
- case alpha: return "alpha";
case arm: return "arm";
- case bfin: return "bfin";
case cellspu: return "cellspu";
+ case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
case mips64: return "mips64";
@@ -29,9 +29,9 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case msp430: return "msp430";
case ppc64: return "powerpc64";
case ppc: return "powerpc";
+ case r600: return "r600";
case sparc: return "sparc";
case sparcv9: return "sparcv9";
- case systemz: return "s390x";
case tce: return "tce";
case thumb: return "thumb";
case x86: return "i386";
@@ -44,7 +44,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case amdil: return "amdil";
}
- return "<invalid>";
+ llvm_unreachable("Invalid ArchType!");
}
const char *Triple::getArchTypePrefix(ArchType Kind) {
@@ -52,13 +52,9 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
default:
return 0;
- case alpha: return "alpha";
-
case arm:
case thumb: return "arm";
- case bfin: return "bfin";
-
case cellspu: return "spu";
case ppc64:
@@ -66,6 +62,10 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case mblaze: return "mblaze";
+ case hexagon: return "hexagon";
+
+ case r600: return "r600";
+
case sparcv9:
case sparc: return "sparc";
@@ -88,9 +88,11 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
case Apple: return "apple";
case PC: return "pc";
case SCEI: return "scei";
+ case BGP: return "bgp";
+ case BGQ: return "bgq";
}
- return "<invalid>";
+ llvm_unreachable("Invalid VendorType!");
}
const char *Triple::getOSTypeName(OSType Kind) {
@@ -110,83 +112,59 @@ const char *Triple::getOSTypeName(OSType Kind) {
case MinGW32: return "mingw32";
case NetBSD: return "netbsd";
case OpenBSD: return "openbsd";
- case Psp: return "psp";
case Solaris: return "solaris";
case Win32: return "win32";
case Haiku: return "haiku";
case Minix: return "minix";
case RTEMS: return "rtems";
case NativeClient: return "nacl";
+ case CNK: return "cnk";
}
- return "<invalid>";
+ llvm_unreachable("Invalid OSType");
}
const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
switch (Kind) {
case UnknownEnvironment: return "unknown";
case GNU: return "gnu";
+ case GNUEABIHF: return "gnueabihf";
case GNUEABI: return "gnueabi";
case EABI: return "eabi";
case MachO: return "macho";
+ case ANDROIDEABI: return "androideabi";
}
- return "<invalid>";
+ llvm_unreachable("Invalid EnvironmentType!");
}
Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
- if (Name == "alpha")
- return alpha;
- if (Name == "arm")
- return arm;
- if (Name == "bfin")
- return bfin;
- if (Name == "cellspu")
- return cellspu;
- if (Name == "mips")
- return mips;
- if (Name == "mipsel")
- return mipsel;
- if (Name == "mips64")
- return mips64;
- if (Name == "mips64el")
- return mips64el;
- if (Name == "msp430")
- return msp430;
- if (Name == "ppc64")
- return ppc64;
- if (Name == "ppc32")
- return ppc;
- if (Name == "ppc")
- return ppc;
- if (Name == "mblaze")
- return mblaze;
- if (Name == "sparc")
- return sparc;
- if (Name == "sparcv9")
- return sparcv9;
- if (Name == "systemz")
- return systemz;
- if (Name == "tce")
- return tce;
- if (Name == "thumb")
- return thumb;
- if (Name == "x86")
- return x86;
- if (Name == "x86-64")
- return x86_64;
- if (Name == "xcore")
- return xcore;
- if (Name == "ptx32")
- return ptx32;
- if (Name == "ptx64")
- return ptx64;
- if (Name == "le32")
- return le32;
- if (Name == "amdil")
- return amdil;
-
- return UnknownArch;
+ return StringSwitch<Triple::ArchType>(Name)
+ .Case("arm", arm)
+ .Case("cellspu", cellspu)
+ .Case("mips", mips)
+ .Case("mipsel", mipsel)
+ .Case("mips64", mips64)
+ .Case("mips64el", mips64el)
+ .Case("msp430", msp430)
+ .Case("ppc64", ppc64)
+ .Case("ppc32", ppc)
+ .Case("ppc", ppc)
+ .Case("mblaze", mblaze)
+ .Case("r600", r600)
+ .Case("hexagon", hexagon)
+ .Case("sparc", sparc)
+ .Case("sparcv9", sparcv9)
+ .Case("tce", tce)
+ .Case("thumb", thumb)
+ .Case("x86", x86)
+ .Case("x86-64", x86_64)
+ .Case("xcore", xcore)
+ .Case("ptx32", ptx32)
+ .Case("ptx64", ptx64)
+ .Case("le32", le32)
+ .Case("amdil", amdil)
+ .Default(UnknownArch);
}
Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
@@ -202,36 +180,22 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
// This code must be kept in sync with Clang's Darwin specific argument
// translation.
- if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" ||
- Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" ||
- Str == "ppc7450" || Str == "ppc970")
- return Triple::ppc;
-
- if (Str == "ppc64")
- return Triple::ppc64;
-
- if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" ||
- Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" ||
- Str == "pentIIm5" || Str == "pentium4")
- return Triple::x86;
-
- if (Str == "x86_64")
- return Triple::x86_64;
-
- // This is derived from the driver driver.
- if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
- Str == "armv6" || Str == "armv7" || Str == "armv7f" || Str == "armv7k" ||
- Str == "armv7s")
- return Triple::arm;
-
- if (Str == "ptx32")
- return Triple::ptx32;
- if (Str == "ptx64")
- return Triple::ptx64;
- if (Str == "amdil")
- return Triple::amdil;
-
- return Triple::UnknownArch;
+ return StringSwitch<ArchType>(Str)
+ .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", Triple::ppc)
+ .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", Triple::ppc)
+ .Case("ppc64", Triple::ppc64)
+ .Cases("i386", "i486", "i486SX", "i586", "i686", Triple::x86)
+ .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
+ Triple::x86)
+ .Case("x86_64", Triple::x86_64)
+ // This is derived from the driver driver.
+ .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm)
+ .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm)
+ .Case("r600", Triple::r600)
+ .Case("ptx32", Triple::ptx32)
+ .Case("ptx64", Triple::ptx64)
+ .Case("amdil", Triple::amdil)
+ .Default(Triple::UnknownArch);
}
// Returns architecture name that is understood by the target assembler.
@@ -239,188 +203,150 @@ const char *Triple::getArchNameForAssembler() {
if (!isOSDarwin() && getVendor() != Triple::Apple)
return NULL;
- StringRef Str = getArchName();
- if (Str == "i386")
- return "i386";
- if (Str == "x86_64")
- return "x86_64";
- if (Str == "powerpc")
- return "ppc";
- if (Str == "powerpc64")
- return "ppc64";
- if (Str == "mblaze" || Str == "microblaze")
- return "mblaze";
- if (Str == "arm")
- return "arm";
- if (Str == "armv4t" || Str == "thumbv4t")
- return "armv4t";
- if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
- || Str == "thumbv5e")
- return "armv5";
- if (Str == "armv6" || Str == "thumbv6")
- return "armv6";
- if (Str == "armv7" || Str == "thumbv7")
- return "armv7";
- if (Str == "ptx32")
- return "ptx32";
- if (Str == "ptx64")
- return "ptx64";
- if (Str == "le32")
- return "le32";
- if (Str == "amdil")
- return "amdil";
- return NULL;
+ return StringSwitch<const char*>(getArchName())
+ .Case("i386", "i386")
+ .Case("x86_64", "x86_64")
+ .Case("powerpc", "ppc")
+ .Case("powerpc64", "ppc64")
+ .Cases("mblaze", "microblaze", "mblaze")
+ .Case("arm", "arm")
+ .Cases("armv4t", "thumbv4t", "armv4t")
+ .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5")
+ .Cases("armv6", "thumbv6", "armv6")
+ .Cases("armv7", "thumbv7", "armv7")
+ .Case("r600", "r600")
+ .Case("ptx32", "ptx32")
+ .Case("ptx64", "ptx64")
+ .Case("le32", "le32")
+ .Case("amdil", "amdil")
+ .Default(NULL);
}
-//
+static Triple::ArchType parseArch(StringRef ArchName) {
+ return StringSwitch<Triple::ArchType>(ArchName)
+ .Cases("i386", "i486", "i586", "i686", Triple::x86)
+ // FIXME: Do we need to support these?
+ .Cases("i786", "i886", "i986", Triple::x86)
+ .Cases("amd64", "x86_64", Triple::x86_64)
+ .Case("powerpc", Triple::ppc)
+ .Cases("powerpc64", "ppu", Triple::ppc64)
+ .Case("mblaze", Triple::mblaze)
+ .Cases("arm", "xscale", Triple::arm)
+ // FIXME: It would be good to replace these with explicit names for all the
+ // various suffixes supported.
+ .StartsWith("armv", Triple::arm)
+ .Case("thumb", Triple::thumb)
+ .StartsWith("thumbv", Triple::thumb)
+ .Cases("spu", "cellspu", Triple::cellspu)
+ .Case("msp430", Triple::msp430)
+ .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
+ .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
+ .Cases("mips64", "mips64eb", Triple::mips64)
+ .Case("mips64el", Triple::mips64el)
+ .Case("r600", Triple::r600)
+ .Case("hexagon", Triple::hexagon)
+ .Case("sparc", Triple::sparc)
+ .Case("sparcv9", Triple::sparcv9)
+ .Case("tce", Triple::tce)
+ .Case("xcore", Triple::xcore)
+ .Case("ptx32", Triple::ptx32)
+ .Case("ptx64", Triple::ptx64)
+ .Case("le32", Triple::le32)
+ .Case("amdil", Triple::amdil)
+ .Default(Triple::UnknownArch);
+}
-Triple::ArchType Triple::ParseArch(StringRef ArchName) {
- if (ArchName.size() == 4 && ArchName[0] == 'i' &&
- ArchName[2] == '8' && ArchName[3] == '6' &&
- ArchName[1] - '3' < 6) // i[3-9]86
- return x86;
- else if (ArchName == "amd64" || ArchName == "x86_64")
- return x86_64;
- else if (ArchName == "bfin")
- return bfin;
- else if (ArchName == "powerpc")
- return ppc;
- else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
- return ppc64;
- else if (ArchName == "mblaze")
- return mblaze;
- else if (ArchName == "arm" ||
- ArchName.startswith("armv") ||
- ArchName == "xscale")
- return arm;
- else if (ArchName == "thumb" ||
- ArchName.startswith("thumbv"))
- return thumb;
- else if (ArchName.startswith("alpha"))
- return alpha;
- else if (ArchName == "spu" || ArchName == "cellspu")
- return cellspu;
- else if (ArchName == "msp430")
- return msp430;
- else if (ArchName == "mips" || ArchName == "mipseb" ||
- ArchName == "mipsallegrex")
- return mips;
- else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
- ArchName == "psp")
- return mipsel;
- else if (ArchName == "mips64" || ArchName == "mips64eb")
- return mips64;
- else if (ArchName == "mips64el")
- return mips64el;
- else if (ArchName == "sparc")
- return sparc;
- else if (ArchName == "sparcv9")
- return sparcv9;
- else if (ArchName == "s390x")
- return systemz;
- else if (ArchName == "tce")
- return tce;
- else if (ArchName == "xcore")
- return xcore;
- else if (ArchName == "ptx32")
- return ptx32;
- else if (ArchName == "ptx64")
- return ptx64;
- else if (ArchName == "le32")
- return le32;
- else if (ArchName == "amdil")
- return amdil;
- else
- return UnknownArch;
+static Triple::VendorType parseVendor(StringRef VendorName) {
+ return StringSwitch<Triple::VendorType>(VendorName)
+ .Case("apple", Triple::Apple)
+ .Case("pc", Triple::PC)
+ .Case("scei", Triple::SCEI)
+ .Case("bgp", Triple::BGP)
+ .Case("bgq", Triple::BGQ)
+ .Default(Triple::UnknownVendor);
}
-Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
- if (VendorName == "apple")
- return Apple;
- else if (VendorName == "pc")
- return PC;
- else if (VendorName == "scei")
- return SCEI;
- else
- return UnknownVendor;
-}
-
-Triple::OSType Triple::ParseOS(StringRef OSName) {
- if (OSName.startswith("auroraux"))
- return AuroraUX;
- else if (OSName.startswith("cygwin"))
- return Cygwin;
- else if (OSName.startswith("darwin"))
- return Darwin;
- else if (OSName.startswith("dragonfly"))
- return DragonFly;
- else if (OSName.startswith("freebsd"))
- return FreeBSD;
- else if (OSName.startswith("ios"))
- return IOS;
- else if (OSName.startswith("kfreebsd"))
- return KFreeBSD;
- else if (OSName.startswith("linux"))
- return Linux;
- else if (OSName.startswith("lv2"))
- return Lv2;
- else if (OSName.startswith("macosx"))
- return MacOSX;
- else if (OSName.startswith("mingw32"))
- return MinGW32;
- else if (OSName.startswith("netbsd"))
- return NetBSD;
- else if (OSName.startswith("openbsd"))
- return OpenBSD;
- else if (OSName.startswith("psp"))
- return Psp;
- else if (OSName.startswith("solaris"))
- return Solaris;
- else if (OSName.startswith("win32"))
- return Win32;
- else if (OSName.startswith("haiku"))
- return Haiku;
- else if (OSName.startswith("minix"))
- return Minix;
- else if (OSName.startswith("rtems"))
- return RTEMS;
- else if (OSName.startswith("nacl"))
- return NativeClient;
- else
- return UnknownOS;
-}
-
-Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
- if (EnvironmentName.startswith("eabi"))
- return EABI;
- else if (EnvironmentName.startswith("gnueabi"))
- return GNUEABI;
- else if (EnvironmentName.startswith("gnu"))
- return GNU;
- else if (EnvironmentName.startswith("macho"))
- return MachO;
- else
- return UnknownEnvironment;
+static Triple::OSType parseOS(StringRef OSName) {
+ return StringSwitch<Triple::OSType>(OSName)
+ .StartsWith("auroraux", Triple::AuroraUX)
+ .StartsWith("cygwin", Triple::Cygwin)
+ .StartsWith("darwin", Triple::Darwin)
+ .StartsWith("dragonfly", Triple::DragonFly)
+ .StartsWith("freebsd", Triple::FreeBSD)
+ .StartsWith("ios", Triple::IOS)
+ .StartsWith("kfreebsd", Triple::KFreeBSD)
+ .StartsWith("linux", Triple::Linux)
+ .StartsWith("lv2", Triple::Lv2)
+ .StartsWith("macosx", Triple::MacOSX)
+ .StartsWith("mingw32", Triple::MinGW32)
+ .StartsWith("netbsd", Triple::NetBSD)
+ .StartsWith("openbsd", Triple::OpenBSD)
+ .StartsWith("solaris", Triple::Solaris)
+ .StartsWith("win32", Triple::Win32)
+ .StartsWith("haiku", Triple::Haiku)
+ .StartsWith("minix", Triple::Minix)
+ .StartsWith("rtems", Triple::RTEMS)
+ .StartsWith("nacl", Triple::NativeClient)
+ .StartsWith("cnk", Triple::CNK)
+ .Default(Triple::UnknownOS);
}
-void Triple::Parse() const {
- assert(!isInitialized() && "Invalid parse call.");
+static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
+ return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
+ .StartsWith("eabi", Triple::EABI)
+ .StartsWith("gnueabihf", Triple::GNUEABIHF)
+ .StartsWith("gnueabi", Triple::GNUEABI)
+ .StartsWith("gnu", Triple::GNU)
+ .StartsWith("macho", Triple::MachO)
+ .StartsWith("androideabi", Triple::ANDROIDEABI)
+ .Default(Triple::UnknownEnvironment);
+}
- Arch = ParseArch(getArchName());
- Vendor = ParseVendor(getVendorName());
- OS = ParseOS(getOSName());
- Environment = ParseEnvironment(getEnvironmentName());
+/// \brief Construct a triple from the string representation provided.
+///
+/// This stores the string representation and parses the various pieces into
+/// enum members.
+Triple::Triple(const Twine &Str)
+ : Data(Str.str()),
+ Arch(parseArch(getArchName())),
+ Vendor(parseVendor(getVendorName())),
+ OS(parseOS(getOSName())),
+ Environment(parseEnvironment(getEnvironmentName())) {
+}
- assert(isInitialized() && "Failed to initialize!");
+/// \brief Construct a triple from string representations of the architecture,
+/// vendor, and OS.
+///
+/// This joins each argument into a canonical string representation and parses
+/// them into enum members. It leaves the environment unknown and omits it from
+/// the string representation.
+Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr)
+ : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()),
+ Arch(parseArch(ArchStr.str())),
+ Vendor(parseVendor(VendorStr.str())),
+ OS(parseOS(OSStr.str())),
+ Environment() {
+}
+
+/// \brief Construct a triple from string representations of the architecture,
+/// vendor, OS, and environment.
+///
+/// This joins each argument into a canonical string representation and parses
+/// them into enum members.
+Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr,
+ const Twine &EnvironmentStr)
+ : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') +
+ EnvironmentStr).str()),
+ Arch(parseArch(ArchStr.str())),
+ Vendor(parseVendor(VendorStr.str())),
+ OS(parseOS(OSStr.str())),
+ Environment(parseEnvironment(EnvironmentStr.str())) {
}
std::string Triple::normalize(StringRef Str) {
// Parse into components.
SmallVector<StringRef, 4> Components;
- for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
- Last = Str.find('-', First);
- Components.push_back(Str.slice(First, Last));
- }
+ Str.split(Components, "-");
// If the first component corresponds to a known architecture, preferentially
// use it for the architecture. If the second component corresponds to a
@@ -429,16 +355,16 @@ std::string Triple::normalize(StringRef Str) {
// valid os.
ArchType Arch = UnknownArch;
if (Components.size() > 0)
- Arch = ParseArch(Components[0]);
+ Arch = parseArch(Components[0]);
VendorType Vendor = UnknownVendor;
if (Components.size() > 1)
- Vendor = ParseVendor(Components[1]);
+ Vendor = parseVendor(Components[1]);
OSType OS = UnknownOS;
if (Components.size() > 2)
- OS = ParseOS(Components[2]);
+ OS = parseOS(Components[2]);
EnvironmentType Environment = UnknownEnvironment;
if (Components.size() > 3)
- Environment = ParseEnvironment(Components[3]);
+ Environment = parseEnvironment(Components[3]);
// Note which components are already in their final position. These will not
// be moved.
@@ -464,22 +390,21 @@ std::string Triple::normalize(StringRef Str) {
bool Valid = false;
StringRef Comp = Components[Idx];
switch (Pos) {
- default:
- assert(false && "unexpected component type!");
+ default: llvm_unreachable("unexpected component type!");
case 0:
- Arch = ParseArch(Comp);
+ Arch = parseArch(Comp);
Valid = Arch != UnknownArch;
break;
case 1:
- Vendor = ParseVendor(Comp);
+ Vendor = parseVendor(Comp);
Valid = Vendor != UnknownVendor;
break;
case 2:
- OS = ParseOS(Comp);
+ OS = parseOS(Comp);
Valid = OS != UnknownOS;
break;
case 3:
- Environment = ParseEnvironment(Comp);
+ Environment = parseEnvironment(Comp);
Valid = Environment != UnknownEnvironment;
break;
}
@@ -500,7 +425,8 @@ std::string Triple::normalize(StringRef Str) {
// components to the right.
for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
// Skip over any fixed components.
- while (i < array_lengthof(Found) && Found[i]) ++i;
+ while (i < array_lengthof(Found) && Found[i])
+ ++i;
// Place the component at the new position, getting the component
// that was at this position - it will be moved right.
std::swap(CurrentComponent, Components[i]);
@@ -528,7 +454,8 @@ std::string Triple::normalize(StringRef Str) {
Components.push_back(CurrentComponent);
// Advance Idx to the component's new position.
- while (++Idx < array_lengthof(Found) && Found[Idx]) {}
+ while (++Idx < array_lengthof(Found) && Found[Idx])
+ ;
} while (Idx < Pos); // Add more until the final position is reached.
}
assert(Pos < Components.size() && Components[Pos] == Comp &&
@@ -618,9 +545,47 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
}
}
+bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ getOSVersion(Major, Minor, Micro);
+
+ switch (getOS()) {
+ default: llvm_unreachable("unexpected OS for Darwin triple");
+ case Darwin:
+ // Default to darwin8, i.e., MacOSX 10.4.
+ if (Major == 0)
+ Major = 8;
+ // Darwin version numbers are skewed from OS X versions.
+ if (Major < 4)
+ return false;
+ Micro = 0;
+ Minor = Major - 4;
+ Major = 10;
+ break;
+ case MacOSX:
+ // Default to 10.4.
+ if (Major == 0) {
+ Major = 10;
+ Minor = 4;
+ }
+ if (Major != 10)
+ return false;
+ break;
+ case IOS:
+ // Ignore the version from the triple. This is only handled because the
+ // the clang driver combines OS X and IOS support into a common Darwin
+ // toolchain that wants to know the OS X version number even when targeting
+ // IOS.
+ Major = 10;
+ Minor = 4;
+ Micro = 0;
+ break;
+ }
+ return true;
+}
+
void Triple::setTriple(const Twine &Str) {
- Data = Str.str();
- Arch = InvalidArch;
+ *this = Triple(Str);
}
void Triple::setArch(ArchType Kind) {
@@ -670,3 +635,126 @@ void Triple::setEnvironmentName(StringRef Str) {
void Triple::setOSAndEnvironmentName(StringRef Str) {
setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
}
+
+static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
+ switch (Arch) {
+ case llvm::Triple::UnknownArch:
+ return 0;
+
+ case llvm::Triple::msp430:
+ return 16;
+
+ case llvm::Triple::amdil:
+ case llvm::Triple::arm:
+ case llvm::Triple::cellspu:
+ case llvm::Triple::hexagon:
+ case llvm::Triple::le32:
+ case llvm::Triple::mblaze:
+ case llvm::Triple::mips:
+ case llvm::Triple::mipsel:
+ case llvm::Triple::ppc:
+ case llvm::Triple::ptx32:
+ case llvm::Triple::r600:
+ case llvm::Triple::sparc:
+ case llvm::Triple::tce:
+ case llvm::Triple::thumb:
+ case llvm::Triple::x86:
+ case llvm::Triple::xcore:
+ return 32;
+
+ case llvm::Triple::mips64:
+ case llvm::Triple::mips64el:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::ptx64:
+ case llvm::Triple::sparcv9:
+ case llvm::Triple::x86_64:
+ return 64;
+ }
+ llvm_unreachable("Invalid architecture value");
+}
+
+bool Triple::isArch64Bit() const {
+ return getArchPointerBitWidth(getArch()) == 64;
+}
+
+bool Triple::isArch32Bit() const {
+ return getArchPointerBitWidth(getArch()) == 32;
+}
+
+bool Triple::isArch16Bit() const {
+ return getArchPointerBitWidth(getArch()) == 16;
+}
+
+Triple Triple::get32BitArchVariant() const {
+ Triple T(*this);
+ switch (getArch()) {
+ case Triple::UnknownArch:
+ case Triple::msp430:
+ T.setArch(UnknownArch);
+ break;
+
+ case Triple::amdil:
+ case Triple::arm:
+ case Triple::cellspu:
+ case Triple::hexagon:
+ case Triple::le32:
+ case Triple::mblaze:
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::ppc:
+ case Triple::ptx32:
+ case Triple::r600:
+ case Triple::sparc:
+ case Triple::tce:
+ case Triple::thumb:
+ case Triple::x86:
+ case Triple::xcore:
+ // Already 32-bit.
+ break;
+
+ case Triple::mips64: T.setArch(Triple::mips); break;
+ case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::ppc64: T.setArch(Triple::ppc); break;
+ case Triple::ptx64: T.setArch(Triple::ptx32); break;
+ case Triple::sparcv9: T.setArch(Triple::sparc); break;
+ case Triple::x86_64: T.setArch(Triple::x86); break;
+ }
+ return T;
+}
+
+Triple Triple::get64BitArchVariant() const {
+ Triple T(*this);
+ switch (getArch()) {
+ case Triple::UnknownArch:
+ case Triple::amdil:
+ case Triple::arm:
+ case Triple::cellspu:
+ case Triple::hexagon:
+ case Triple::le32:
+ case Triple::mblaze:
+ case Triple::msp430:
+ case Triple::r600:
+ case Triple::tce:
+ case Triple::thumb:
+ case Triple::xcore:
+ T.setArch(UnknownArch);
+ break;
+
+ case Triple::mips64:
+ case Triple::mips64el:
+ case Triple::ppc64:
+ case Triple::ptx64:
+ case Triple::sparcv9:
+ case Triple::x86_64:
+ // Already 64-bit.
+ break;
+
+ case Triple::mips: T.setArch(Triple::mips64); break;
+ case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::ppc: T.setArch(Triple::ppc64); break;
+ case Triple::ptx32: T.setArch(Triple::ptx64); break;
+ case Triple::sparc: T.setArch(Triple::sparcv9); break;
+ case Triple::x86: T.setArch(Triple::x86_64); break;
+ }
+ return T;
+}
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index dda3ce2c6f97..726e2fbcf056 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -35,14 +35,11 @@ static std::string getOSVersion() {
return info.release;
}
-std::string sys::getHostTriple() {
- // FIXME: Derive directly instead of relying on the autoconf generated
- // variable.
+std::string sys::getDefaultTargetTriple() {
+ StringRef TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
+ std::pair<StringRef, StringRef> ArchSplit = TargetTripleString.split('-');
- StringRef HostTripleString(LLVM_HOSTTRIPLE);
- std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
-
- // Normalize the arch, since the host triple may not actually match the host.
+ // Normalize the arch, since the target triple may not actually match the target.
std::string Arch = ArchSplit.first;
std::string Triple(Arch);
@@ -55,7 +52,7 @@ std::string sys::getHostTriple() {
Triple[1] = '3';
// On darwin, we want to update the version to match that of the
- // host.
+ // target.
std::string::size_type DarwinDashIdx = Triple.find("-darwin");
if (DarwinDashIdx != std::string::npos) {
Triple.resize(DarwinDashIdx + strlen("-darwin"));
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 85c7c4022f48..ddc1e0f9cec8 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -60,6 +60,11 @@
#include <mach-o/dyld.h>
#endif
+// For GNU Hurd
+#if defined(__GNU__) && !defined(MAXPATHLEN)
+# define MAXPATHLEN 4096
+#endif
+
// Put in a hack for Cygwin which falsely reports that the mkdtemp function
// is available when it is not.
#ifdef __CYGWIN__
@@ -235,11 +240,6 @@ Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
}
Path
-Path::GetLLVMDefaultConfigDir() {
- return Path("/etc/llvm/");
-}
-
-Path
Path::GetUserHomeDirectory() {
const char* home = getenv("HOME");
Path result;
@@ -261,7 +261,7 @@ Path::GetCurrentDirectory() {
}
#if defined(__FreeBSD__) || defined (__NetBSD__) || \
- defined(__OpenBSD__) || defined(__minix)
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
@@ -313,7 +313,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
free(pv);
return (NULL);
}
-#endif // __FreeBSD__ || __NetBSD__
+#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
@@ -330,7 +330,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
return Path(link_path);
}
#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
- defined(__OpenBSD__) || defined(__minix)
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
char exe_path[PATH_MAX];
if (getprogpath(exe_path, argv0) != NULL)
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index bbbc344661be..edb101efb0f6 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -46,6 +46,11 @@
#include <limits.h>
#endif
+// For GNU Hurd
+#if defined(__GNU__) && !defined(PATH_MAX)
+# define PATH_MAX 4096
+#endif
+
using namespace llvm;
namespace {
@@ -87,7 +92,7 @@ namespace {
result.clear();
StringRef d(dir);
result.append(d.begin(), d.end());
- return success;
+ return error_code::success();
}
}
@@ -96,7 +101,12 @@ namespace sys {
namespace fs {
error_code current_path(SmallVectorImpl<char> &result) {
+#ifdef MAXPATHLEN
result.reserve(MAXPATHLEN);
+#else
+// For GNU Hurd
+ result.reserve(1024);
+#endif
while (true) {
if (::getcwd(result.data(), result.capacity()) == 0) {
@@ -110,7 +120,7 @@ error_code current_path(SmallVectorImpl<char> &result) {
}
result.set_size(strlen(result.data()));
- return success;
+ return error_code::success();
}
error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
@@ -169,7 +179,7 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
if (sz_read < 0)
return error_code(errno, system_category());
- return success;
+ return error_code::success();
}
error_code create_directory(const Twine &path, bool &existed) {
@@ -183,7 +193,7 @@ error_code create_directory(const Twine &path, bool &existed) {
} else
existed = false;
- return success;
+ return error_code::success();
}
error_code create_hard_link(const Twine &to, const Twine &from) {
@@ -196,7 +206,7 @@ error_code create_hard_link(const Twine &to, const Twine &from) {
if (::link(t.begin(), f.begin()) == -1)
return error_code(errno, system_category());
- return success;
+ return error_code::success();
}
error_code create_symlink(const Twine &to, const Twine &from) {
@@ -209,7 +219,7 @@ error_code create_symlink(const Twine &to, const Twine &from) {
if (::symlink(t.begin(), f.begin()) == -1)
return error_code(errno, system_category());
- return success;
+ return error_code::success();
}
error_code remove(const Twine &path, bool &existed) {
@@ -223,7 +233,7 @@ error_code remove(const Twine &path, bool &existed) {
} else
existed = true;
- return success;
+ return error_code::success();
}
error_code rename(const Twine &from, const Twine &to) {
@@ -245,7 +255,7 @@ error_code rename(const Twine &from, const Twine &to) {
return error_code(errno, system_category());
}
- return success;
+ return error_code::success();
}
error_code resize_file(const Twine &path, uint64_t size) {
@@ -255,7 +265,7 @@ error_code resize_file(const Twine &path, uint64_t size) {
if (::truncate(p.begin(), size) == -1)
return error_code(errno, system_category());
- return success;
+ return error_code::success();
}
error_code exists(const Twine &path, bool &result) {
@@ -270,32 +280,21 @@ error_code exists(const Twine &path, bool &result) {
} else
result = true;
- return success;
+ return error_code::success();
}
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
- // Get arguments.
- SmallString<128> a_storage;
- SmallString<128> b_storage;
- StringRef a = A.toNullTerminatedStringRef(a_storage);
- StringRef b = B.toNullTerminatedStringRef(b_storage);
-
- struct stat stat_a, stat_b;
- int error_b = ::stat(b.begin(), &stat_b);
- int error_a = ::stat(a.begin(), &stat_a);
-
- // If both are invalid, it's an error. If only one is, the result is false.
- if (error_a != 0 || error_b != 0) {
- if (error_a == error_b)
- return error_code(errno, system_category());
- result = false;
- } else {
- result =
- stat_a.st_dev == stat_b.st_dev &&
- stat_a.st_ino == stat_b.st_ino;
- }
+bool equivalent(file_status A, file_status B) {
+ assert(status_known(A) && status_known(B));
+ return A.st_dev == B.st_dev &&
+ A.st_ino == B.st_ino;
+}
- return success;
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ file_status fsA, fsB;
+ if (error_code ec = status(A, fsA)) return ec;
+ if (error_code ec = status(B, fsB)) return ec;
+ result = equivalent(fsA, fsB);
+ return error_code::success();
}
error_code file_size(const Twine &path, uint64_t &result) {
@@ -309,7 +308,7 @@ error_code file_size(const Twine &path, uint64_t &result) {
return make_error_code(errc::operation_not_permitted);
result = status.st_size;
- return success;
+ return error_code::success();
}
error_code status(const Twine &path, file_status &result) {
@@ -341,7 +340,10 @@ error_code status(const Twine &path, file_status &result) {
else
result = file_status(file_type::type_unknown);
- return success;
+ result.st_dev = status.st_dev;
+ result.st_ino = status.st_ino;
+
+ return error_code::success();
}
error_code unique_file(const Twine &model, int &result_fd,
@@ -436,10 +438,11 @@ rety_open_create:
result_path.append(d.begin(), d.end());
result_fd = RandomFD;
- return success;
+ return error_code::success();
}
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+ StringRef path){
SmallString<128> path_null(path);
DIR *directory = ::opendir(path_null.c_str());
if (directory == 0)
@@ -452,15 +455,15 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
return directory_iterator_increment(it);
}
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle)
::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
it.IterationHandle = 0;
it.CurrentEntry = directory_entry();
- return success;
+ return error_code::success();
}
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
errno = 0;
dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
if (cur_dir == 0 && errno != 0) {
@@ -474,7 +477,7 @@ error_code directory_iterator_increment(directory_iterator& it) {
} else
return directory_iterator_destruct(it);
- return success;
+ return error_code::success();
}
error_code get_magic(const Twine &path, uint32_t len,
@@ -505,7 +508,7 @@ error_code get_magic(const Twine &path, uint32_t len,
}
std::fclose(file);
result.set_size(len);
- return success;
+ return error_code::success();
}
} // end namespace fs
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index da440fd48f3d..2d7fd384e8bb 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -136,7 +136,7 @@ int Process::GetCurrentGroupId() {
return getgid();
}
-#ifdef HAVE_MACH_MACH_H
+#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
#include <mach/mach.h>
#endif
@@ -150,7 +150,7 @@ void Process::PreventCoreFiles() {
setrlimit(RLIMIT_CORE, &rlim);
#endif
-#ifdef HAVE_MACH_MACH_H
+#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
// Disable crash reporting on Mac OS X 10.0-10.4
// get information about the original set of exception ports for the task
@@ -293,7 +293,3 @@ const char *Process::OutputBold(bool bg) {
const char *Process::ResetColor() {
return "\033[0m";
}
-
-void Process::SetWorkingDirectory(std::string Path) {
- ::chdir(Path.c_str());
-}
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 346baf1744dc..e5990d06ecc2 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -412,19 +412,19 @@ Program::Kill(std::string* ErrMsg) {
return false;
}
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return false;
+ return make_error_code(errc::success);
}
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return false;
+ return make_error_code(errc::success);
}
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return false;
+ return make_error_code(errc::success);
}
}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index e286869e775d..c9ec9fce9aa1 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -30,6 +30,10 @@
#include <dlfcn.h>
#include <cxxabi.h>
#endif
+#if HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
using namespace llvm;
static RETSIGTYPE SignalHandler(int Sig); // defined below.
@@ -261,6 +265,22 @@ static void PrintStackTrace(void *) {
/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
void llvm::sys::PrintStackTraceOnErrorSignal() {
AddSignalHandler(PrintStackTrace, 0);
+
+#if defined(__APPLE__)
+ // Environment variable to disable any kind of crash dialog.
+ if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+ mach_port_t self = mach_task_self();
+
+ exception_mask_t mask = EXC_MASK_CRASH;
+
+ kern_return_t ret = task_set_exception_ports(self,
+ mask,
+ MACH_PORT_NULL,
+ EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES,
+ THREAD_STATE_NONE);
+ (void)ret;
+ }
+#endif
}
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index 703448524ed9..2b250a357758 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -52,3 +52,16 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
}
#endif // !HAVE_VALGRIND_VALGRIND_H
+
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
+// These functions require no implementation, tsan just looks at the arguments
+// they're called with.
+extern "C" {
+void AnnotateHappensBefore(const char *file, int line,
+ const volatile void *cv) {}
+void AnnotateHappensAfter(const char *file, int line,
+ const volatile void *cv) {}
+void AnnotateIgnoreWritesBegin(const char *file, int line) {}
+void AnnotateIgnoreWritesEnd(const char *file, int line) {}
+}
+#endif
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index 733830e82f08..2e6d6f190370 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -17,7 +17,6 @@
using namespace llvm;
-std::string sys::getHostTriple() {
- // FIXME: Adapt to running version.
- return LLVM_HOSTTRIPLE;
+std::string sys::getDefaultTargetTriple() {
+ return LLVM_DEFAULT_TARGET_TRIPLE;
}
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 42a92f9c6dfe..d8dc5226ccee 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -66,29 +66,20 @@ Path::operator=(StringRef that) {
return *this;
}
-// push_back 0 on create, and pop_back on delete.
-struct ScopedNullTerminator {
- std::string &str;
- ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
- ~ScopedNullTerminator() {
- // str.pop_back(); But wait, C++03 doesn't have this...
- assert(!str.empty() && str[str.size() - 1] == 0
- && "Null char not present!");
- str.resize(str.size() - 1);
- }
-};
-
bool
Path::isValid() const {
if (path.empty())
return false;
+ size_t len = path.size();
+ // If there is a null character, it and all its successors are ignored.
+ size_t pos = path.find_first_of('\0');
+ if (pos != std::string::npos)
+ len = pos;
+
// If there is a colon, it must be the second character, preceded by a letter
// and followed by something.
- size_t len = path.size();
- // This code assumes that path is null terminated, so make sure it is.
- ScopedNullTerminator snt(path);
- size_t pos = path.rfind(':',len);
+ pos = path.rfind(':',len);
size_t rootslash = 0;
if (pos != std::string::npos) {
if (pos != 1 || !isalpha(path[0]) || len < 3)
@@ -118,13 +109,13 @@ Path::isValid() const {
for (pos = 0; pos < len; ++pos) {
// A component may not end in a space.
if (path[pos] == ' ') {
- if (path[pos+1] == '/' || path[pos+1] == '\0')
+ if (pos+1 == len || path[pos+1] == '/' || path[pos+1] == '\0')
return false;
}
// A component may not end in a period.
if (path[pos] == '.') {
- if (path[pos+1] == '/' || path[pos+1] == '\0') {
+ if (pos+1 == len || path[pos+1] == '/') {
// Unless it is the pseudo-directory "."...
if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
return true;
@@ -286,14 +277,6 @@ Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
}
Path
-Path::GetLLVMDefaultConfigDir() {
- Path ret = GetUserHomeDirectory();
- if (!ret.appendComponent(".llvm"))
- assert(0 && "Failed to append .llvm");
- return ret;
-}
-
-Path
Path::GetUserHomeDirectory() {
char buff[MAX_PATH];
HRESULT res = SHGetFolderPathA(NULL,
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index bc597b2dcc89..e9ce5d9097a3 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
#include "Windows.h"
-#include <wincrypt.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
@@ -63,7 +62,7 @@ namespace {
utf16.push_back(0);
utf16.pop_back();
- return success;
+ return error_code::success();
}
error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
@@ -93,7 +92,7 @@ namespace {
utf8.push_back(0);
utf8.pop_back();
- return success;
+ return error_code::success();
}
error_code TempDir(SmallVectorImpl<wchar_t> &result) {
@@ -109,17 +108,9 @@ namespace {
}
result.set_size(len);
- return success;
+ return error_code::success();
}
- // Forwarder for ScopedHandle.
- BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
- return ::CryptReleaseContext(Provider, 0);
- }
-
- typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
- BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
- ScopedCryptContext;
bool is_separator(const wchar_t value) {
switch (value) {
case L'\\':
@@ -176,7 +167,7 @@ retry_cur_dir:
if (len == 0)
return windows_error(::GetLastError());
- return success;
+ return error_code::success();
}
error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
@@ -199,7 +190,7 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
if (res == 0)
return windows_error(::GetLastError());
- return success;
+ return error_code::success();
}
error_code create_directory(const Twine &path, bool &existed) {
@@ -219,7 +210,7 @@ error_code create_directory(const Twine &path, bool &existed) {
} else
existed = false;
- return success;
+ return error_code::success();
}
error_code create_hard_link(const Twine &to, const Twine &from) {
@@ -238,7 +229,7 @@ error_code create_hard_link(const Twine &to, const Twine &from) {
if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
return windows_error(::GetLastError());
- return success;
+ return error_code::success();
}
error_code create_symlink(const Twine &to, const Twine &from) {
@@ -261,7 +252,7 @@ error_code create_symlink(const Twine &to, const Twine &from) {
if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
return windows_error(::GetLastError());
- return success;
+ return error_code::success();
}
error_code remove(const Twine &path, bool &existed) {
@@ -294,7 +285,7 @@ error_code remove(const Twine &path, bool &existed) {
existed = true;
}
- return success;
+ return error_code::success();
}
error_code rename(const Twine &from, const Twine &to) {
@@ -314,7 +305,7 @@ error_code rename(const Twine &from, const Twine &to) {
MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
return windows_error(::GetLastError());
- return success;
+ return error_code::success();
}
error_code resize_file(const Twine &path, uint64_t size) {
@@ -356,72 +347,26 @@ error_code exists(const Twine &path, bool &result) {
result = false;
} else
result = true;
- return success;
+ return error_code::success();
}
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
- // Get arguments.
- SmallString<128> a_storage;
- SmallString<128> b_storage;
- StringRef a = A.toStringRef(a_storage);
- StringRef b = B.toStringRef(b_storage);
-
- // Convert to utf-16.
- SmallVector<wchar_t, 128> wide_a;
- SmallVector<wchar_t, 128> wide_b;
- if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
- if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
-
- AutoHandle HandleB(
- ::CreateFileW(wide_b.begin(),
- 0,
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- 0,
- OPEN_EXISTING,
- FILE_FLAG_BACKUP_SEMANTICS,
- 0));
-
- AutoHandle HandleA(
- ::CreateFileW(wide_a.begin(),
- 0,
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- 0,
- OPEN_EXISTING,
- FILE_FLAG_BACKUP_SEMANTICS,
- 0));
-
- // If both handles are invalid, it's an error.
- if (HandleA == INVALID_HANDLE_VALUE &&
- HandleB == INVALID_HANDLE_VALUE)
- return windows_error(::GetLastError());
-
- // If only one is invalid, it's false.
- if (HandleA == INVALID_HANDLE_VALUE &&
- HandleB == INVALID_HANDLE_VALUE) {
- result = false;
- return success;
- }
-
- // Get file information.
- BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
- if (!::GetFileInformationByHandle(HandleA, &InfoA))
- return windows_error(::GetLastError());
- if (!::GetFileInformationByHandle(HandleB, &InfoB))
- return windows_error(::GetLastError());
+bool equivalent(file_status A, file_status B) {
+ assert(status_known(A) && status_known(B));
+ return A.FileIndexHigh == B.FileIndexHigh &&
+ A.FileIndexLow == B.FileIndexLow &&
+ A.FileSizeHigh == B.FileSizeHigh &&
+ A.FileSizeLow == B.FileSizeLow &&
+ A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
+ A.LastWriteTimeLow == B.LastWriteTimeLow &&
+ A.VolumeSerialNumber == B.VolumeSerialNumber;
+}
- // See if it's all the same.
- result =
- InfoA.dwVolumeSerialNumber == InfoB.dwVolumeSerialNumber &&
- InfoA.nFileIndexHigh == InfoB.nFileIndexHigh &&
- InfoA.nFileIndexLow == InfoB.nFileIndexLow &&
- InfoA.nFileSizeHigh == InfoB.nFileSizeHigh &&
- InfoA.nFileSizeLow == InfoB.nFileSizeLow &&
- InfoA.ftLastWriteTime.dwLowDateTime ==
- InfoB.ftLastWriteTime.dwLowDateTime &&
- InfoA.ftLastWriteTime.dwHighDateTime ==
- InfoB.ftLastWriteTime.dwHighDateTime;
-
- return success;
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ file_status fsA, fsB;
+ if (error_code ec = status(A, fsA)) return ec;
+ if (error_code ec = status(B, fsB)) return ec;
+ result = equivalent(fsA, fsB);
+ return error_code::success();
}
error_code file_size(const Twine &path, uint64_t &result) {
@@ -442,7 +387,7 @@ error_code file_size(const Twine &path, uint64_t &result) {
(uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+ FileData.nFileSizeLow;
- return success;
+ return error_code::success();
}
static bool isReservedName(StringRef path) {
@@ -475,11 +420,10 @@ error_code status(const Twine &path, file_status &result) {
StringRef path8 = path.toStringRef(path_storage);
if (isReservedName(path8)) {
result = file_status(file_type::character_file);
- return success;
+ return error_code::success();
}
- if (error_code ec = UTF8ToUTF16(path8,
- path_utf16))
+ if (error_code ec = UTF8ToUTF16(path8, path_utf16))
return ec;
DWORD attr = ::GetFileAttributesW(path_utf16.begin());
@@ -488,7 +432,7 @@ error_code status(const Twine &path, file_status &result) {
// Handle reparse points.
if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
- AutoHandle h(
+ ScopedFileHandle h(
::CreateFileW(path_utf16.begin(),
0, // Attributes only.
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -496,16 +440,37 @@ error_code status(const Twine &path, file_status &result) {
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
0));
- if (h == INVALID_HANDLE_VALUE)
+ if (!h)
goto handle_status_error;
}
if (attr & FILE_ATTRIBUTE_DIRECTORY)
result = file_status(file_type::directory_file);
- else
+ else {
result = file_status(file_type::regular_file);
+ ScopedFileHandle h(
+ ::CreateFileW(path_utf16.begin(),
+ 0, // Attributes only.
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+ if (!h)
+ goto handle_status_error;
+ BY_HANDLE_FILE_INFORMATION Info;
+ if (!::GetFileInformationByHandle(h, &Info))
+ goto handle_status_error;
+ result.FileIndexHigh = Info.nFileIndexHigh;
+ result.FileIndexLow = Info.nFileIndexLow;
+ result.FileSizeHigh = Info.nFileSizeHigh;
+ result.FileSizeLow = Info.nFileSizeLow;
+ result.LastWriteTimeHigh = Info.ftLastWriteTime.dwHighDateTime;
+ result.LastWriteTimeLow = Info.ftLastWriteTime.dwLowDateTime;
+ result.VolumeSerialNumber = Info.dwVolumeSerialNumber;
+ }
- return success;
+ return error_code::success();
handle_status_error:
error_code ec = windows_error(::GetLastError());
@@ -519,7 +484,7 @@ handle_status_error:
return ec;
}
- return success;
+ return error_code::success();
}
error_code unique_file(const Twine &model, int &result_fd,
@@ -535,7 +500,7 @@ error_code unique_file(const Twine &model, int &result_fd,
if (makeAbsolute) {
// Make model absolute by prepending a temp directory if it's not already.
bool absolute = path::is_absolute(m);
-
+
if (!absolute) {
SmallVector<wchar_t, 64> temp_dir;
if (error_code ec = TempDir(temp_dir)) return ec;
@@ -646,7 +611,7 @@ retry_create_file:
}
result_fd = fd;
- return success;
+ return error_code::success();
}
error_code get_magic(const Twine &path, uint32_t len,
@@ -688,10 +653,11 @@ error_code get_magic(const Twine &path, uint32_t len,
}
result.set_size(len);
- return success;
+ return error_code::success();
}
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+ StringRef path){
SmallVector<wchar_t, 128> path_utf16;
if (error_code ec = UTF8ToUTF16(path,
@@ -722,7 +688,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
error_code ec = windows_error(::GetLastError());
// Check for end.
if (ec == windows_error::no_more_files)
- return directory_iterator_destruct(it);
+ return detail::directory_iterator_destruct(it);
return ec;
} else
FilenameLen = ::wcslen(FirstFind.cFileName);
@@ -739,25 +705,25 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
path::append(directory_entry_path, directory_entry_name_utf8.str());
it.CurrentEntry = directory_entry(directory_entry_path.str());
- return success;
+ return error_code::success();
}
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle != 0)
// Closes the handle if it's valid.
ScopedFindHandle close(HANDLE(it.IterationHandle));
it.IterationHandle = 0;
it.CurrentEntry = directory_entry();
- return success;
+ return error_code::success();
}
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
WIN32_FIND_DATAW FindData;
if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
error_code ec = windows_error(::GetLastError());
// Check for end.
if (ec == windows_error::no_more_files)
- return directory_iterator_destruct(it);
+ return detail::directory_iterator_destruct(it);
return ec;
}
@@ -774,7 +740,7 @@ error_code directory_iterator_increment(directory_iterator& it) {
return ec;
it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
- return success;
+ return error_code::success();
}
} // end namespace fs
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index fe54eb1a7972..913b0734ddc9 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -220,8 +220,4 @@ const char *Process::ResetColor() {
return 0;
}
-void Process::SetWorkingDirectory(std::string Path) {
- ::_chdir(Path.c_str());
-}
-
}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index e486e6ec2381..80ccaa6ea6b1 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -299,14 +299,14 @@ Program::Execute(const Path& path,
Data_ = wpi;
// Make sure these get closed no matter what.
- AutoHandle hThread(pi.hThread);
+ ScopedCommonHandle hThread(pi.hThread);
// Assign the process to a job if a memory limit is defined.
- AutoHandle hJob(0);
+ ScopedJobHandle hJob;
if (memoryLimit != 0) {
hJob = CreateJobObject(0, 0);
bool success = false;
- if (hJob != 0) {
+ if (hJob) {
JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
memset(&jeli, 0, sizeof(jeli));
jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
@@ -367,7 +367,17 @@ Program::Wait(const Path &path,
return -2;
}
- return status;
+ if (!status)
+ return 0;
+
+ // Pass 10(Warning) and 11(Error) to the callee as negative value.
+ if ((status & 0xBFFF0000U) == 0x80000000U)
+ return (int)status;
+
+ if (status & 0xFF)
+ return status & 0x7FFFFFFF;
+
+ return 1;
}
bool
@@ -387,19 +397,25 @@ Program::Kill(std::string* ErrMsg) {
return false;
}
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
int result = _setmode( _fileno(stdin), _O_BINARY );
- return result == -1;
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
}
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
int result = _setmode( _fileno(stdout), _O_BINARY );
- return result == -1;
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
}
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
int result = _setmode( _fileno(stderr), _O_BINARY );
- return result == -1;
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
}
}
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 0d4b8a26b023..38308f6abd85 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -239,7 +239,7 @@ static void RegisterHandler() {
SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
// Environment variable to disable any kind of crash dialog.
- if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+ if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
#ifdef _MSC_VER
_CrtSetReportHook(CRTReportHook);
#endif
@@ -446,7 +446,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
}
if (ExitOnUnhandledExceptions)
- _exit(-3);
+ _exit(ep->ExceptionRecord->ExceptionCode);
// Allow dialog box to pop up allowing choice to start debugger.
if (OldFilter)
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 67b6f015114f..5c1da0d617aa 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -26,6 +26,7 @@
#include "llvm/Config/config.h" // Get build system configuration settings
#include <windows.h>
+#include <wincrypt.h>
#include <shlobj.h>
#include <cassert>
#include <string>
@@ -41,70 +42,99 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
return true;
}
-class AutoHandle {
- HANDLE handle;
+template <typename HandleTraits>
+class ScopedHandle {
+ typedef typename HandleTraits::handle_type handle_type;
+ handle_type Handle;
+ ScopedHandle(const ScopedHandle &other); // = delete;
+ void operator=(const ScopedHandle &other); // = delete;
public:
- AutoHandle(HANDLE h) : handle(h) {}
+ ScopedHandle()
+ : Handle(HandleTraits::GetInvalid()) {}
+
+ explicit ScopedHandle(handle_type h)
+ : Handle(h) {}
- ~AutoHandle() {
- if (handle)
- CloseHandle(handle);
+ ~ScopedHandle() {
+ if (HandleTraits::IsValid(Handle))
+ HandleTraits::Close(Handle);
}
- operator HANDLE() {
- return handle;
+ handle_type take() {
+ handle_type t = Handle;
+ Handle = HandleTraits::GetInvalid();
+ return t;
}
- AutoHandle &operator=(HANDLE h) {
- handle = h;
+ ScopedHandle &operator=(handle_type h) {
+ if (HandleTraits::IsValid(Handle))
+ HandleTraits::Close(Handle);
+ Handle = h;
return *this;
}
+
+ // True if Handle is valid.
+ operator bool() const {
+ return HandleTraits::IsValid(Handle) ? true : false;
+ }
+
+ operator handle_type() const {
+ return Handle;
+ }
};
-template <class HandleType, uintptr_t InvalidHandle,
- class DeleterType, DeleterType D>
-class ScopedHandle {
- HandleType Handle;
+struct CommonHandleTraits {
+ typedef HANDLE handle_type;
-public:
- ScopedHandle() : Handle(InvalidHandle) {}
- ScopedHandle(HandleType handle) : Handle(handle) {}
+ static handle_type GetInvalid() {
+ return INVALID_HANDLE_VALUE;
+ }
- ~ScopedHandle() {
- if (Handle != HandleType(InvalidHandle))
- D(Handle);
+ static void Close(handle_type h) {
+ ::CloseHandle(h);
}
- HandleType take() {
- HandleType temp = Handle;
- Handle = HandleType(InvalidHandle);
- return temp;
+ static bool IsValid(handle_type h) {
+ return h != GetInvalid();
}
+};
- operator HandleType() const { return Handle; }
+struct JobHandleTraits : CommonHandleTraits {
+ static handle_type GetInvalid() {
+ return NULL;
+ }
+};
- ScopedHandle &operator=(HandleType handle) {
- Handle = handle;
- return *this;
+struct CryptContextTraits : CommonHandleTraits {
+ typedef HCRYPTPROV handle_type;
+
+ static handle_type GetInvalid() {
+ return 0;
}
- typedef void (*unspecified_bool_type)();
- static void unspecified_bool_true() {}
+ static void Close(handle_type h) {
+ ::CryptReleaseContext(h, 0);
+ }
- // True if Handle is valid.
- operator unspecified_bool_type() const {
- return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+ static bool IsValid(handle_type h) {
+ return h != GetInvalid();
}
+};
- bool operator!() const {
- return Handle == HandleType(InvalidHandle);
+struct FindHandleTraits : CommonHandleTraits {
+ static void Close(handle_type h) {
+ ::FindClose(h);
}
};
-typedef ScopedHandle<HANDLE, uintptr_t(-1),
- BOOL (WINAPI*)(HANDLE), ::FindClose>
- ScopedFindHandle;
+struct FileHandleTraits : CommonHandleTraits {};
+
+typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle;
+typedef ScopedHandle<FileHandleTraits> ScopedFileHandle;
+typedef ScopedHandle<CryptContextTraits> ScopedCryptContext;
+typedef ScopedHandle<FindHandleTraits> ScopedFindHandle;
+typedef ScopedHandle<JobHandleTraits> ScopedJobHandle;
namespace llvm {
template <class T>
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
new file mode 100644
index 000000000000..330519f3019d
--- /dev/null
+++ b/lib/Support/YAMLParser.cpp
@@ -0,0 +1,2117 @@
+//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a YAML parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLParser.h"
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+using namespace yaml;
+
+enum UnicodeEncodingForm {
+ UEF_UTF32_LE, //< UTF-32 Little Endian
+ UEF_UTF32_BE, //< UTF-32 Big Endian
+ UEF_UTF16_LE, //< UTF-16 Little Endian
+ UEF_UTF16_BE, //< UTF-16 Big Endian
+ UEF_UTF8, //< UTF-8 or ascii.
+ UEF_Unknown //< Not a valid Unicode encoding.
+};
+
+/// EncodingInfo - Holds the encoding type and length of the byte order mark if
+/// it exists. Length is in {0, 2, 3, 4}.
+typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
+
+/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
+/// encoding form of \a Input.
+///
+/// @param Input A string of length 0 or more.
+/// @returns An EncodingInfo indicating the Unicode encoding form of the input
+/// and how long the byte order mark is if one exists.
+static EncodingInfo getUnicodeEncoding(StringRef Input) {
+ if (Input.size() == 0)
+ return std::make_pair(UEF_Unknown, 0);
+
+ switch (uint8_t(Input[0])) {
+ case 0x00:
+ if (Input.size() >= 4) {
+ if ( Input[1] == 0
+ && uint8_t(Input[2]) == 0xFE
+ && uint8_t(Input[3]) == 0xFF)
+ return std::make_pair(UEF_UTF32_BE, 4);
+ if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
+ return std::make_pair(UEF_UTF32_BE, 0);
+ }
+
+ if (Input.size() >= 2 && Input[1] != 0)
+ return std::make_pair(UEF_UTF16_BE, 0);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xFF:
+ if ( Input.size() >= 4
+ && uint8_t(Input[1]) == 0xFE
+ && Input[2] == 0
+ && Input[3] == 0)
+ return std::make_pair(UEF_UTF32_LE, 4);
+
+ if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
+ return std::make_pair(UEF_UTF16_LE, 2);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xFE:
+ if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
+ return std::make_pair(UEF_UTF16_BE, 2);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xEF:
+ if ( Input.size() >= 3
+ && uint8_t(Input[1]) == 0xBB
+ && uint8_t(Input[2]) == 0xBF)
+ return std::make_pair(UEF_UTF8, 3);
+ return std::make_pair(UEF_Unknown, 0);
+ }
+
+ // It could still be utf-32 or utf-16.
+ if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
+ return std::make_pair(UEF_UTF32_LE, 0);
+
+ if (Input.size() >= 2 && Input[1] == 0)
+ return std::make_pair(UEF_UTF16_LE, 0);
+
+ return std::make_pair(UEF_UTF8, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// Token - A single YAML token.
+struct Token : ilist_node<Token> {
+ enum TokenKind {
+ TK_Error, // Uninitialized token.
+ TK_StreamStart,
+ TK_StreamEnd,
+ TK_VersionDirective,
+ TK_TagDirective,
+ TK_DocumentStart,
+ TK_DocumentEnd,
+ TK_BlockEntry,
+ TK_BlockEnd,
+ TK_BlockSequenceStart,
+ TK_BlockMappingStart,
+ TK_FlowEntry,
+ TK_FlowSequenceStart,
+ TK_FlowSequenceEnd,
+ TK_FlowMappingStart,
+ TK_FlowMappingEnd,
+ TK_Key,
+ TK_Value,
+ TK_Scalar,
+ TK_Alias,
+ TK_Anchor,
+ TK_Tag
+ } Kind;
+
+ /// A string of length 0 or more whose begin() points to the logical location
+ /// of the token in the input.
+ StringRef Range;
+
+ Token() : Kind(TK_Error) {}
+};
+}
+}
+
+namespace llvm {
+template<>
+struct ilist_sentinel_traits<Token> {
+ Token *createSentinel() const {
+ return &Sentinel;
+ }
+ static void destroySentinel(Token*) {}
+
+ Token *provideInitialHead() const { return createSentinel(); }
+ Token *ensureHead(Token*) const { return createSentinel(); }
+ static void noteHead(Token*, Token*) {}
+
+private:
+ mutable Token Sentinel;
+};
+
+template<>
+struct ilist_node_traits<Token> {
+ Token *createNode(const Token &V) {
+ return new (Alloc.Allocate<Token>()) Token(V);
+ }
+ static void deleteNode(Token *V) {}
+
+ void addNodeToList(Token *) {}
+ void removeNodeFromList(Token *) {}
+ void transferNodesFromList(ilist_node_traits & /*SrcTraits*/,
+ ilist_iterator<Token> /*first*/,
+ ilist_iterator<Token> /*last*/) {}
+
+ BumpPtrAllocator Alloc;
+};
+}
+
+typedef ilist<Token> TokenQueueT;
+
+namespace {
+/// @brief This struct is used to track simple keys.
+///
+/// Simple keys are handled by creating an entry in SimpleKeys for each Token
+/// which could legally be the start of a simple key. When peekNext is called,
+/// if the Token To be returned is referenced by a SimpleKey, we continue
+/// tokenizing until that potential simple key has either been found to not be
+/// a simple key (we moved on to the next line or went further than 1024 chars).
+/// Or when we run into a Value, and then insert a Key token (and possibly
+/// others) before the SimpleKey's Tok.
+struct SimpleKey {
+ TokenQueueT::iterator Tok;
+ unsigned Column;
+ unsigned Line;
+ unsigned FlowLevel;
+ bool IsRequired;
+
+ bool operator ==(const SimpleKey &Other) {
+ return Tok == Other.Tok;
+ }
+};
+}
+
+/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
+/// subsequence and the subsequence's length in code units (uint8_t).
+/// A length of 0 represents an error.
+typedef std::pair<uint32_t, unsigned> UTF8Decoded;
+
+static UTF8Decoded decodeUTF8(StringRef Range) {
+ StringRef::iterator Position= Range.begin();
+ StringRef::iterator End = Range.end();
+ // 1 byte: [0x00, 0x7f]
+ // Bit pattern: 0xxxxxxx
+ if ((*Position & 0x80) == 0) {
+ return std::make_pair(*Position, 1);
+ }
+ // 2 bytes: [0x80, 0x7ff]
+ // Bit pattern: 110xxxxx 10xxxxxx
+ if (Position + 1 != End &&
+ ((*Position & 0xE0) == 0xC0) &&
+ ((*(Position + 1) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x1F) << 6) |
+ (*(Position + 1) & 0x3F);
+ if (codepoint >= 0x80)
+ return std::make_pair(codepoint, 2);
+ }
+ // 3 bytes: [0x8000, 0xffff]
+ // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
+ if (Position + 2 != End &&
+ ((*Position & 0xF0) == 0xE0) &&
+ ((*(Position + 1) & 0xC0) == 0x80) &&
+ ((*(Position + 2) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x0F) << 12) |
+ ((*(Position + 1) & 0x3F) << 6) |
+ (*(Position + 2) & 0x3F);
+ // Codepoints between 0xD800 and 0xDFFF are invalid, as
+ // they are high / low surrogate halves used by UTF-16.
+ if (codepoint >= 0x800 &&
+ (codepoint < 0xD800 || codepoint > 0xDFFF))
+ return std::make_pair(codepoint, 3);
+ }
+ // 4 bytes: [0x10000, 0x10FFFF]
+ // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (Position + 3 != End &&
+ ((*Position & 0xF8) == 0xF0) &&
+ ((*(Position + 1) & 0xC0) == 0x80) &&
+ ((*(Position + 2) & 0xC0) == 0x80) &&
+ ((*(Position + 3) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x07) << 18) |
+ ((*(Position + 1) & 0x3F) << 12) |
+ ((*(Position + 2) & 0x3F) << 6) |
+ (*(Position + 3) & 0x3F);
+ if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
+ return std::make_pair(codepoint, 4);
+ }
+ return std::make_pair(0, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// @brief Scans YAML tokens from a MemoryBuffer.
+class Scanner {
+public:
+ Scanner(const StringRef Input, SourceMgr &SM);
+
+ /// @brief Parse the next token and return it without popping it.
+ Token &peekNext();
+
+ /// @brief Parse the next token and pop it from the queue.
+ Token getNext();
+
+ void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ SM.PrintMessage(Loc, Kind, Message, Ranges);
+ }
+
+ void setError(const Twine &Message, StringRef::iterator Position) {
+ if (Current >= End)
+ Current = End - 1;
+
+ // Don't print out more errors after the first one we encounter. The rest
+ // are just the result of the first, and have no meaning.
+ if (!Failed)
+ printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
+ Failed = true;
+ }
+
+ void setError(const Twine &Message) {
+ setError(Message, Current);
+ }
+
+ /// @brief Returns true if an error occurred while parsing.
+ bool failed() {
+ return Failed;
+ }
+
+private:
+ StringRef currentInput() {
+ return StringRef(Current, End - Current);
+ }
+
+ /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
+ /// at \a Position.
+ ///
+ /// If the UTF-8 code units starting at Position do not form a well-formed
+ /// code unit subsequence, then the Unicode scalar value is 0, and the length
+ /// is 0.
+ UTF8Decoded decodeUTF8(StringRef::iterator Position) {
+ return ::decodeUTF8(StringRef(Position, End - Position));
+ }
+
+ // The following functions are based on the gramar rules in the YAML spec. The
+ // style of the function names it meant to closely match how they are written
+ // in the spec. The number within the [] is the number of the grammar rule in
+ // the spec.
+ //
+ // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
+ //
+ // c-
+ // A production starting and ending with a special character.
+ // b-
+ // A production matching a single line break.
+ // nb-
+ // A production starting and ending with a non-break character.
+ // s-
+ // A production starting and ending with a white space character.
+ // ns-
+ // A production starting and ending with a non-space character.
+ // l-
+ // A production matching complete line(s).
+
+ /// @brief Skip a single nb-char[27] starting at Position.
+ ///
+ /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
+ /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
+ ///
+ /// @returns The code unit after the nb-char, or Position if it's not an
+ /// nb-char.
+ StringRef::iterator skip_nb_char(StringRef::iterator Position);
+
+ /// @brief Skip a single b-break[28] starting at Position.
+ ///
+ /// A b-break is 0xD 0xA | 0xD | 0xA
+ ///
+ /// @returns The code unit after the b-break, or Position if it's not a
+ /// b-break.
+ StringRef::iterator skip_b_break(StringRef::iterator Position);
+
+ /// @brief Skip a single s-white[33] starting at Position.
+ ///
+ /// A s-white is 0x20 | 0x9
+ ///
+ /// @returns The code unit after the s-white, or Position if it's not a
+ /// s-white.
+ StringRef::iterator skip_s_white(StringRef::iterator Position);
+
+ /// @brief Skip a single ns-char[34] starting at Position.
+ ///
+ /// A ns-char is nb-char - s-white
+ ///
+ /// @returns The code unit after the ns-char, or Position if it's not a
+ /// ns-char.
+ StringRef::iterator skip_ns_char(StringRef::iterator Position);
+
+ typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
+ /// @brief Skip minimal well-formed code unit subsequences until Func
+ /// returns its input.
+ ///
+ /// @returns The code unit after the last minimal well-formed code unit
+ /// subsequence that Func accepted.
+ StringRef::iterator skip_while( SkipWhileFunc Func
+ , StringRef::iterator Position);
+
+ /// @brief Scan ns-uri-char[39]s starting at Cur.
+ ///
+ /// This updates Cur and Column while scanning.
+ ///
+ /// @returns A StringRef starting at Cur which covers the longest contiguous
+ /// sequence of ns-uri-char.
+ StringRef scan_ns_uri_char();
+
+ /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
+ StringRef scan_ns_plain_one_line();
+
+ /// @brief Consume a minimal well-formed code unit subsequence starting at
+ /// \a Cur. Return false if it is not the same Unicode scalar value as
+ /// \a Expected. This updates \a Column.
+ bool consume(uint32_t Expected);
+
+ /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
+ void skip(uint32_t Distance);
+
+ /// @brief Return true if the minimal well-formed code unit subsequence at
+ /// Pos is whitespace or a new line
+ bool isBlankOrBreak(StringRef::iterator Position);
+
+ /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
+ void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+ , unsigned AtColumn
+ , bool IsRequired);
+
+ /// @brief Remove simple keys that can no longer be valid simple keys.
+ ///
+ /// Invalid simple keys are not on the current line or are further than 1024
+ /// columns back.
+ void removeStaleSimpleKeyCandidates();
+
+ /// @brief Remove all simple keys on FlowLevel \a Level.
+ void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
+
+ /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
+ /// tokens if needed.
+ bool unrollIndent(int ToColumn);
+
+ /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
+ /// if needed.
+ bool rollIndent( int ToColumn
+ , Token::TokenKind Kind
+ , TokenQueueT::iterator InsertPoint);
+
+ /// @brief Skip whitespace and comments until the start of the next token.
+ void scanToNextToken();
+
+ /// @brief Must be the first token generated.
+ bool scanStreamStart();
+
+ /// @brief Generate tokens needed to close out the stream.
+ bool scanStreamEnd();
+
+ /// @brief Scan a %BLAH directive.
+ bool scanDirective();
+
+ /// @brief Scan a ... or ---.
+ bool scanDocumentIndicator(bool IsStart);
+
+ /// @brief Scan a [ or { and generate the proper flow collection start token.
+ bool scanFlowCollectionStart(bool IsSequence);
+
+ /// @brief Scan a ] or } and generate the proper flow collection end token.
+ bool scanFlowCollectionEnd(bool IsSequence);
+
+ /// @brief Scan the , that separates entries in a flow collection.
+ bool scanFlowEntry();
+
+ /// @brief Scan the - that starts block sequence entries.
+ bool scanBlockEntry();
+
+ /// @brief Scan an explicit ? indicating a key.
+ bool scanKey();
+
+ /// @brief Scan an explicit : indicating a value.
+ bool scanValue();
+
+ /// @brief Scan a quoted scalar.
+ bool scanFlowScalar(bool IsDoubleQuoted);
+
+ /// @brief Scan an unquoted scalar.
+ bool scanPlainScalar();
+
+ /// @brief Scan an Alias or Anchor starting with * or &.
+ bool scanAliasOrAnchor(bool IsAlias);
+
+ /// @brief Scan a block scalar starting with | or >.
+ bool scanBlockScalar(bool IsLiteral);
+
+ /// @brief Scan a tag of the form !stuff.
+ bool scanTag();
+
+ /// @brief Dispatch to the next scanning function based on \a *Cur.
+ bool fetchMoreTokens();
+
+ /// @brief The SourceMgr used for diagnostics and buffer management.
+ SourceMgr &SM;
+
+ /// @brief The original input.
+ MemoryBuffer *InputBuffer;
+
+ /// @brief The current position of the scanner.
+ StringRef::iterator Current;
+
+ /// @brief The end of the input (one past the last character).
+ StringRef::iterator End;
+
+ /// @brief Current YAML indentation level in spaces.
+ int Indent;
+
+ /// @brief Current column number in Unicode code points.
+ unsigned Column;
+
+ /// @brief Current line number.
+ unsigned Line;
+
+ /// @brief How deep we are in flow style containers. 0 Means at block level.
+ unsigned FlowLevel;
+
+ /// @brief Are we at the start of the stream?
+ bool IsStartOfStream;
+
+ /// @brief Can the next token be the start of a simple key?
+ bool IsSimpleKeyAllowed;
+
+ /// @brief Is the next token required to start a simple key?
+ bool IsSimpleKeyRequired;
+
+ /// @brief True if an error has occurred.
+ bool Failed;
+
+ /// @brief Queue of tokens. This is required to queue up tokens while looking
+ /// for the end of a simple key. And for cases where a single character
+ /// can produce multiple tokens (e.g. BlockEnd).
+ TokenQueueT TokenQueue;
+
+ /// @brief Indentation levels.
+ SmallVector<int, 4> Indents;
+
+ /// @brief Potential simple keys.
+ SmallVector<SimpleKey, 4> SimpleKeys;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
+static void encodeUTF8( uint32_t UnicodeScalarValue
+ , SmallVectorImpl<char> &Result) {
+ if (UnicodeScalarValue <= 0x7F) {
+ Result.push_back(UnicodeScalarValue & 0x7F);
+ } else if (UnicodeScalarValue <= 0x7FF) {
+ uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
+ uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ } else if (UnicodeScalarValue <= 0xFFFF) {
+ uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
+ uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+ uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ Result.push_back(ThirdByte);
+ } else if (UnicodeScalarValue <= 0x10FFFF) {
+ uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
+ uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
+ uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+ uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ Result.push_back(ThirdByte);
+ Result.push_back(FourthByte);
+ }
+}
+
+bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
+ SourceMgr SM;
+ Scanner scanner(Input, SM);
+ while (true) {
+ Token T = scanner.getNext();
+ switch (T.Kind) {
+ case Token::TK_StreamStart:
+ OS << "Stream-Start: ";
+ break;
+ case Token::TK_StreamEnd:
+ OS << "Stream-End: ";
+ break;
+ case Token::TK_VersionDirective:
+ OS << "Version-Directive: ";
+ break;
+ case Token::TK_TagDirective:
+ OS << "Tag-Directive: ";
+ break;
+ case Token::TK_DocumentStart:
+ OS << "Document-Start: ";
+ break;
+ case Token::TK_DocumentEnd:
+ OS << "Document-End: ";
+ break;
+ case Token::TK_BlockEntry:
+ OS << "Block-Entry: ";
+ break;
+ case Token::TK_BlockEnd:
+ OS << "Block-End: ";
+ break;
+ case Token::TK_BlockSequenceStart:
+ OS << "Block-Sequence-Start: ";
+ break;
+ case Token::TK_BlockMappingStart:
+ OS << "Block-Mapping-Start: ";
+ break;
+ case Token::TK_FlowEntry:
+ OS << "Flow-Entry: ";
+ break;
+ case Token::TK_FlowSequenceStart:
+ OS << "Flow-Sequence-Start: ";
+ break;
+ case Token::TK_FlowSequenceEnd:
+ OS << "Flow-Sequence-End: ";
+ break;
+ case Token::TK_FlowMappingStart:
+ OS << "Flow-Mapping-Start: ";
+ break;
+ case Token::TK_FlowMappingEnd:
+ OS << "Flow-Mapping-End: ";
+ break;
+ case Token::TK_Key:
+ OS << "Key: ";
+ break;
+ case Token::TK_Value:
+ OS << "Value: ";
+ break;
+ case Token::TK_Scalar:
+ OS << "Scalar: ";
+ break;
+ case Token::TK_Alias:
+ OS << "Alias: ";
+ break;
+ case Token::TK_Anchor:
+ OS << "Anchor: ";
+ break;
+ case Token::TK_Tag:
+ OS << "Tag: ";
+ break;
+ case Token::TK_Error:
+ break;
+ }
+ OS << T.Range << "\n";
+ if (T.Kind == Token::TK_StreamEnd)
+ break;
+ else if (T.Kind == Token::TK_Error)
+ return false;
+ }
+ return true;
+}
+
+bool yaml::scanTokens(StringRef Input) {
+ llvm::SourceMgr SM;
+ llvm::yaml::Scanner scanner(Input, SM);
+ for (;;) {
+ llvm::yaml::Token T = scanner.getNext();
+ if (T.Kind == Token::TK_StreamEnd)
+ break;
+ else if (T.Kind == Token::TK_Error)
+ return false;
+ }
+ return true;
+}
+
+std::string yaml::escape(StringRef Input) {
+ std::string EscapedInput;
+ for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
+ if (*i == '\\')
+ EscapedInput += "\\\\";
+ else if (*i == '"')
+ EscapedInput += "\\\"";
+ else if (*i == 0)
+ EscapedInput += "\\0";
+ else if (*i == 0x07)
+ EscapedInput += "\\a";
+ else if (*i == 0x08)
+ EscapedInput += "\\b";
+ else if (*i == 0x09)
+ EscapedInput += "\\t";
+ else if (*i == 0x0A)
+ EscapedInput += "\\n";
+ else if (*i == 0x0B)
+ EscapedInput += "\\v";
+ else if (*i == 0x0C)
+ EscapedInput += "\\f";
+ else if (*i == 0x0D)
+ EscapedInput += "\\r";
+ else if (*i == 0x1B)
+ EscapedInput += "\\e";
+ else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
+ std::string HexStr = utohexstr(*i);
+ EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+ } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
+ UTF8Decoded UnicodeScalarValue
+ = decodeUTF8(StringRef(i, Input.end() - i));
+ if (UnicodeScalarValue.second == 0) {
+ // Found invalid char.
+ SmallString<4> Val;
+ encodeUTF8(0xFFFD, Val);
+ EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
+ // FIXME: Error reporting.
+ return EscapedInput;
+ }
+ if (UnicodeScalarValue.first == 0x85)
+ EscapedInput += "\\N";
+ else if (UnicodeScalarValue.first == 0xA0)
+ EscapedInput += "\\_";
+ else if (UnicodeScalarValue.first == 0x2028)
+ EscapedInput += "\\L";
+ else if (UnicodeScalarValue.first == 0x2029)
+ EscapedInput += "\\P";
+ else {
+ std::string HexStr = utohexstr(UnicodeScalarValue.first);
+ if (HexStr.size() <= 2)
+ EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+ else if (HexStr.size() <= 4)
+ EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
+ else if (HexStr.size() <= 8)
+ EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
+ }
+ i += UnicodeScalarValue.second - 1;
+ } else
+ EscapedInput.push_back(*i);
+ }
+ return EscapedInput;
+}
+
+Scanner::Scanner(StringRef Input, SourceMgr &sm)
+ : SM(sm)
+ , Indent(-1)
+ , Column(0)
+ , Line(0)
+ , FlowLevel(0)
+ , IsStartOfStream(true)
+ , IsSimpleKeyAllowed(true)
+ , IsSimpleKeyRequired(false)
+ , Failed(false) {
+ InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
+ SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+ Current = InputBuffer->getBufferStart();
+ End = InputBuffer->getBufferEnd();
+}
+
+Token &Scanner::peekNext() {
+ // If the current token is a possible simple key, keep parsing until we
+ // can confirm.
+ bool NeedMore = false;
+ while (true) {
+ if (TokenQueue.empty() || NeedMore) {
+ if (!fetchMoreTokens()) {
+ TokenQueue.clear();
+ TokenQueue.push_back(Token());
+ return TokenQueue.front();
+ }
+ }
+ assert(!TokenQueue.empty() &&
+ "fetchMoreTokens lied about getting tokens!");
+
+ removeStaleSimpleKeyCandidates();
+ SimpleKey SK;
+ SK.Tok = TokenQueue.front();
+ if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
+ == SimpleKeys.end())
+ break;
+ else
+ NeedMore = true;
+ }
+ return TokenQueue.front();
+}
+
+Token Scanner::getNext() {
+ Token Ret = peekNext();
+ // TokenQueue can be empty if there was an error getting the next token.
+ if (!TokenQueue.empty())
+ TokenQueue.pop_front();
+
+ // There cannot be any referenced Token's if the TokenQueue is empty. So do a
+ // quick deallocation of them all.
+ if (TokenQueue.empty()) {
+ TokenQueue.Alloc.Reset();
+ }
+
+ return Ret;
+}
+
+StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+ // Check 7 bit c-printable - b-char.
+ if ( *Position == 0x09
+ || (*Position >= 0x20 && *Position <= 0x7E))
+ return Position + 1;
+
+ // Check for valid UTF-8.
+ if (uint8_t(*Position) & 0x80) {
+ UTF8Decoded u8d = decodeUTF8(Position);
+ if ( u8d.second != 0
+ && u8d.first != 0xFEFF
+ && ( u8d.first == 0x85
+ || ( u8d.first >= 0xA0
+ && u8d.first <= 0xD7FF)
+ || ( u8d.first >= 0xE000
+ && u8d.first <= 0xFFFD)
+ || ( u8d.first >= 0x10000
+ && u8d.first <= 0x10FFFF)))
+ return Position + u8d.second;
+ }
+ return Position;
+}
+
+StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+ if (*Position == 0x0D) {
+ if (Position + 1 != End && *(Position + 1) == 0x0A)
+ return Position + 2;
+ return Position + 1;
+ }
+
+ if (*Position == 0x0A)
+ return Position + 1;
+ return Position;
+}
+
+
+StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == ' ' || *Position == '\t')
+ return Position + 1;
+ return Position;
+}
+
+StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == ' ' || *Position == '\t')
+ return Position;
+ return skip_nb_char(Position);
+}
+
+StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
+ , StringRef::iterator Position) {
+ while (true) {
+ StringRef::iterator i = (this->*Func)(Position);
+ if (i == Position)
+ break;
+ Position = i;
+ }
+ return Position;
+}
+
+static bool is_ns_hex_digit(const char C) {
+ return (C >= '0' && C <= '9')
+ || (C >= 'a' && C <= 'z')
+ || (C >= 'A' && C <= 'Z');
+}
+
+static bool is_ns_word_char(const char C) {
+ return C == '-'
+ || (C >= 'a' && C <= 'z')
+ || (C >= 'A' && C <= 'Z');
+}
+
+StringRef Scanner::scan_ns_uri_char() {
+ StringRef::iterator Start = Current;
+ while (true) {
+ if (Current == End)
+ break;
+ if (( *Current == '%'
+ && Current + 2 < End
+ && is_ns_hex_digit(*(Current + 1))
+ && is_ns_hex_digit(*(Current + 2)))
+ || is_ns_word_char(*Current)
+ || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
+ != StringRef::npos) {
+ ++Current;
+ ++Column;
+ } else
+ break;
+ }
+ return StringRef(Start, Current - Start);
+}
+
+StringRef Scanner::scan_ns_plain_one_line() {
+ StringRef::iterator start = Current;
+ // The first character must already be verified.
+ ++Current;
+ while (true) {
+ if (Current == End) {
+ break;
+ } else if (*Current == ':') {
+ // Check if the next character is a ns-char.
+ if (Current + 1 == End)
+ break;
+ StringRef::iterator i = skip_ns_char(Current + 1);
+ if (Current + 1 != i) {
+ Current = i;
+ Column += 2; // Consume both the ':' and ns-char.
+ } else
+ break;
+ } else if (*Current == '#') {
+ // Check if the previous character was a ns-char.
+ // The & 0x80 check is to check for the trailing byte of a utf-8
+ if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
+ ++Current;
+ ++Column;
+ } else
+ break;
+ } else {
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+ return StringRef(start, Current - start);
+}
+
+bool Scanner::consume(uint32_t Expected) {
+ if (Expected >= 0x80)
+ report_fatal_error("Not dealing with this yet");
+ if (Current == End)
+ return false;
+ if (uint8_t(*Current) >= 0x80)
+ report_fatal_error("Not dealing with this yet");
+ if (uint8_t(*Current) == Expected) {
+ ++Current;
+ ++Column;
+ return true;
+ }
+ return false;
+}
+
+void Scanner::skip(uint32_t Distance) {
+ Current += Distance;
+ Column += Distance;
+}
+
+bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
+ if (Position == End)
+ return false;
+ if ( *Position == ' ' || *Position == '\t'
+ || *Position == '\r' || *Position == '\n')
+ return true;
+ return false;
+}
+
+void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+ , unsigned AtColumn
+ , bool IsRequired) {
+ if (IsSimpleKeyAllowed) {
+ SimpleKey SK;
+ SK.Tok = Tok;
+ SK.Line = Line;
+ SK.Column = AtColumn;
+ SK.IsRequired = IsRequired;
+ SK.FlowLevel = FlowLevel;
+ SimpleKeys.push_back(SK);
+ }
+}
+
+void Scanner::removeStaleSimpleKeyCandidates() {
+ for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
+ i != SimpleKeys.end();) {
+ if (i->Line != Line || i->Column + 1024 < Column) {
+ if (i->IsRequired)
+ setError( "Could not find expected : for simple key"
+ , i->Tok->Range.begin());
+ i = SimpleKeys.erase(i);
+ } else
+ ++i;
+ }
+}
+
+void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
+ if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
+ SimpleKeys.pop_back();
+}
+
+bool Scanner::unrollIndent(int ToColumn) {
+ Token T;
+ // Indentation is ignored in flow.
+ if (FlowLevel != 0)
+ return true;
+
+ while (Indent > ToColumn) {
+ T.Kind = Token::TK_BlockEnd;
+ T.Range = StringRef(Current, 1);
+ TokenQueue.push_back(T);
+ Indent = Indents.pop_back_val();
+ }
+
+ return true;
+}
+
+bool Scanner::rollIndent( int ToColumn
+ , Token::TokenKind Kind
+ , TokenQueueT::iterator InsertPoint) {
+ if (FlowLevel)
+ return true;
+ if (Indent < ToColumn) {
+ Indents.push_back(Indent);
+ Indent = ToColumn;
+
+ Token T;
+ T.Kind = Kind;
+ T.Range = StringRef(Current, 0);
+ TokenQueue.insert(InsertPoint, T);
+ }
+ return true;
+}
+
+void Scanner::scanToNextToken() {
+ while (true) {
+ while (*Current == ' ' || *Current == '\t') {
+ skip(1);
+ }
+
+ // Skip comment.
+ if (*Current == '#') {
+ while (true) {
+ // This may skip more than one byte, thus Column is only incremented
+ // for code points.
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+
+ // Skip EOL.
+ StringRef::iterator i = skip_b_break(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Line;
+ Column = 0;
+ // New lines may start a simple key.
+ if (!FlowLevel)
+ IsSimpleKeyAllowed = true;
+ }
+}
+
+bool Scanner::scanStreamStart() {
+ IsStartOfStream = false;
+
+ EncodingInfo EI = getUnicodeEncoding(currentInput());
+
+ Token T;
+ T.Kind = Token::TK_StreamStart;
+ T.Range = StringRef(Current, EI.second);
+ TokenQueue.push_back(T);
+ Current += EI.second;
+ return true;
+}
+
+bool Scanner::scanStreamEnd() {
+ // Force an ending new line if one isn't present.
+ if (Column != 0) {
+ Column = 0;
+ ++Line;
+ }
+
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ Token T;
+ T.Kind = Token::TK_StreamEnd;
+ T.Range = StringRef(Current, 0);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanDirective() {
+ // Reset the indentation level.
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ StringRef::iterator Start = Current;
+ consume('%');
+ StringRef::iterator NameStart = Current;
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ StringRef Name(NameStart, Current - NameStart);
+ Current = skip_while(&Scanner::skip_s_white, Current);
+
+ if (Name == "YAML") {
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ Token T;
+ T.Kind = Token::TK_VersionDirective;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
+ }
+ return false;
+}
+
+bool Scanner::scanDocumentIndicator(bool IsStart) {
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ Token T;
+ T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
+ T.Range = StringRef(Current, 3);
+ skip(3);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanFlowCollectionStart(bool IsSequence) {
+ Token T;
+ T.Kind = IsSequence ? Token::TK_FlowSequenceStart
+ : Token::TK_FlowMappingStart;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+
+ // [ and { may begin a simple key.
+ saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+
+ // And may also be followed by a simple key.
+ IsSimpleKeyAllowed = true;
+ ++FlowLevel;
+ return true;
+}
+
+bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = false;
+ Token T;
+ T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
+ : Token::TK_FlowMappingEnd;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ if (FlowLevel)
+ --FlowLevel;
+ return true;
+}
+
+bool Scanner::scanFlowEntry() {
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = true;
+ Token T;
+ T.Kind = Token::TK_FlowEntry;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanBlockEntry() {
+ rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = true;
+ Token T;
+ T.Kind = Token::TK_BlockEntry;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanKey() {
+ if (!FlowLevel)
+ rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = !FlowLevel;
+
+ Token T;
+ T.Kind = Token::TK_Key;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanValue() {
+ // If the previous token could have been a simple key, insert the key token
+ // into the token queue.
+ if (!SimpleKeys.empty()) {
+ SimpleKey SK = SimpleKeys.pop_back_val();
+ Token T;
+ T.Kind = Token::TK_Key;
+ T.Range = SK.Tok->Range;
+ TokenQueueT::iterator i, e;
+ for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
+ if (i == SK.Tok)
+ break;
+ }
+ assert(i != e && "SimpleKey not in token queue!");
+ i = TokenQueue.insert(i, T);
+
+ // We may also need to add a Block-Mapping-Start token.
+ rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
+
+ IsSimpleKeyAllowed = false;
+ } else {
+ if (!FlowLevel)
+ rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+ IsSimpleKeyAllowed = !FlowLevel;
+ }
+
+ Token T;
+ T.Kind = Token::TK_Value;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+ StringRef::iterator Position) {
+ assert(Position - 1 >= First);
+ StringRef::iterator I = Position - 1;
+ // We calculate the number of consecutive '\'s before the current position
+ // by iterating backwards through our string.
+ while (I >= First && *I == '\\') --I;
+ // (Position - 1 - I) now contains the number of '\'s before the current
+ // position. If it is odd, the character at 'Position' was escaped.
+ return (Position - 1 - I) % 2 == 1;
+}
+
+bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ if (IsDoubleQuoted) {
+ do {
+ ++Current;
+ while (Current != End && *Current != '"')
+ ++Current;
+ // Repeat until the previous character was not a '\' or was an escaped
+ // backslash.
+ } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
+ } else {
+ skip(1);
+ while (true) {
+ // Skip a ' followed by another '.
+ if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
+ skip(2);
+ continue;
+ } else if (*Current == '\'')
+ break;
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current) {
+ i = skip_b_break(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ Column = 0;
+ ++Line;
+ } else {
+ if (i == End)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+ }
+ skip(1); // Skip ending quote.
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanPlainScalar() {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ unsigned LeadingBlanks = 0;
+ assert(Indent >= -1 && "Indent must be >= -1 !");
+ unsigned indent = static_cast<unsigned>(Indent + 1);
+ while (true) {
+ if (*Current == '#')
+ break;
+
+ while (!isBlankOrBreak(Current)) {
+ if ( FlowLevel && *Current == ':'
+ && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
+ setError("Found unexpected ':' while scanning a plain scalar", Current);
+ return false;
+ }
+
+ // Check for the end of the plain scalar.
+ if ( (*Current == ':' && isBlankOrBreak(Current + 1))
+ || ( FlowLevel
+ && (StringRef(Current, 1).find_first_of(",:?[]{}")
+ != StringRef::npos)))
+ break;
+
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+
+ // Are we at the end?
+ if (!isBlankOrBreak(Current))
+ break;
+
+ // Eat blanks.
+ StringRef::iterator Tmp = Current;
+ while (isBlankOrBreak(Tmp)) {
+ StringRef::iterator i = skip_s_white(Tmp);
+ if (i != Tmp) {
+ if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
+ setError("Found invalid tab character in indentation", Tmp);
+ return false;
+ }
+ Tmp = i;
+ ++Column;
+ } else {
+ i = skip_b_break(Tmp);
+ if (!LeadingBlanks)
+ LeadingBlanks = 1;
+ Tmp = i;
+ Column = 0;
+ ++Line;
+ }
+ }
+
+ if (!FlowLevel && Column < indent)
+ break;
+
+ Current = Tmp;
+ }
+ if (Start == Current) {
+ setError("Got empty plain scalar", Start);
+ return false;
+ }
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Plain scalars can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanAliasOrAnchor(bool IsAlias) {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ skip(1);
+ while(true) {
+ if ( *Current == '[' || *Current == ']'
+ || *Current == '{' || *Current == '}'
+ || *Current == ','
+ || *Current == ':')
+ break;
+ StringRef::iterator i = skip_ns_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+
+ if (Start == Current) {
+ setError("Got empty alias or anchor", Start);
+ return false;
+ }
+
+ Token T;
+ T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Alias and anchors can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanBlockScalar(bool IsLiteral) {
+ StringRef::iterator Start = Current;
+ skip(1); // Eat | or >
+ while(true) {
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current) {
+ if (Column == 0)
+ break;
+ i = skip_b_break(Current);
+ if (i != Current) {
+ // We got a line break.
+ Column = 0;
+ ++Line;
+ Current = i;
+ continue;
+ } else {
+ // There was an error, which should already have been printed out.
+ return false;
+ }
+ }
+ Current = i;
+ ++Column;
+ }
+
+ if (Start == Current) {
+ setError("Got empty block scalar", Start);
+ return false;
+ }
+
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanTag() {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ skip(1); // Eat !.
+ if (Current == End || isBlankOrBreak(Current)); // An empty tag.
+ else if (*Current == '<') {
+ skip(1);
+ scan_ns_uri_char();
+ if (!consume('>'))
+ return false;
+ } else {
+ // FIXME: Actually parse the c-ns-shorthand-tag rule.
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ }
+
+ Token T;
+ T.Kind = Token::TK_Tag;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Tags can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::fetchMoreTokens() {
+ if (IsStartOfStream)
+ return scanStreamStart();
+
+ scanToNextToken();
+
+ if (Current == End)
+ return scanStreamEnd();
+
+ removeStaleSimpleKeyCandidates();
+
+ unrollIndent(Column);
+
+ if (Column == 0 && *Current == '%')
+ return scanDirective();
+
+ if (Column == 0 && Current + 4 <= End
+ && *Current == '-'
+ && *(Current + 1) == '-'
+ && *(Current + 2) == '-'
+ && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+ return scanDocumentIndicator(true);
+
+ if (Column == 0 && Current + 4 <= End
+ && *Current == '.'
+ && *(Current + 1) == '.'
+ && *(Current + 2) == '.'
+ && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+ return scanDocumentIndicator(false);
+
+ if (*Current == '[')
+ return scanFlowCollectionStart(true);
+
+ if (*Current == '{')
+ return scanFlowCollectionStart(false);
+
+ if (*Current == ']')
+ return scanFlowCollectionEnd(true);
+
+ if (*Current == '}')
+ return scanFlowCollectionEnd(false);
+
+ if (*Current == ',')
+ return scanFlowEntry();
+
+ if (*Current == '-' && isBlankOrBreak(Current + 1))
+ return scanBlockEntry();
+
+ if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ return scanKey();
+
+ if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ return scanValue();
+
+ if (*Current == '*')
+ return scanAliasOrAnchor(true);
+
+ if (*Current == '&')
+ return scanAliasOrAnchor(false);
+
+ if (*Current == '!')
+ return scanTag();
+
+ if (*Current == '|' && !FlowLevel)
+ return scanBlockScalar(true);
+
+ if (*Current == '>' && !FlowLevel)
+ return scanBlockScalar(false);
+
+ if (*Current == '\'')
+ return scanFlowScalar(false);
+
+ if (*Current == '"')
+ return scanFlowScalar(true);
+
+ // Get a plain scalar.
+ StringRef FirstChar(Current, 1);
+ if (!(isBlankOrBreak(Current)
+ || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
+ || (*Current == '-' && !isBlankOrBreak(Current + 1))
+ || (!FlowLevel && (*Current == '?' || *Current == ':')
+ && isBlankOrBreak(Current + 1))
+ || (!FlowLevel && *Current == ':'
+ && Current + 2 < End
+ && *(Current + 1) == ':'
+ && !isBlankOrBreak(Current + 2)))
+ return scanPlainScalar();
+
+ setError("Unrecognized character while tokenizing.");
+ return false;
+}
+
+Stream::Stream(StringRef Input, SourceMgr &SM)
+ : scanner(new Scanner(Input, SM))
+ , CurrentDoc(0) {}
+
+Stream::~Stream() {}
+
+bool Stream::failed() { return scanner->failed(); }
+
+void Stream::printError(Node *N, const Twine &Msg) {
+ SmallVector<SMRange, 1> Ranges;
+ Ranges.push_back(N->getSourceRange());
+ scanner->printError( N->getSourceRange().Start
+ , SourceMgr::DK_Error
+ , Msg
+ , Ranges);
+}
+
+void Stream::handleYAMLDirective(const Token &t) {
+ // TODO: Ensure version is 1.x.
+}
+
+document_iterator Stream::begin() {
+ if (CurrentDoc)
+ report_fatal_error("Can only iterate over the stream once");
+
+ // Skip Stream-Start.
+ scanner->getNext();
+
+ CurrentDoc.reset(new Document(*this));
+ return document_iterator(CurrentDoc);
+}
+
+document_iterator Stream::end() {
+ return document_iterator();
+}
+
+void Stream::skip() {
+ for (document_iterator i = begin(), e = end(); i != e; ++i)
+ i->skip();
+}
+
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+ : Doc(D)
+ , TypeID(Type)
+ , Anchor(A) {
+ SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
+ SourceRange = SMRange(Start, Start);
+}
+
+Token &Node::peekNext() {
+ return Doc->peekNext();
+}
+
+Token Node::getNext() {
+ return Doc->getNext();
+}
+
+Node *Node::parseBlockNode() {
+ return Doc->parseBlockNode();
+}
+
+BumpPtrAllocator &Node::getAllocator() {
+ return Doc->NodeAllocator;
+}
+
+void Node::setError(const Twine &Msg, Token &Tok) const {
+ Doc->setError(Msg, Tok);
+}
+
+bool Node::failed() const {
+ return Doc->failed();
+}
+
+
+
+StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
+ // TODO: Handle newlines properly. We need to remove leading whitespace.
+ if (Value[0] == '"') { // Double quoted.
+ // Pull off the leading and trailing "s.
+ StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+ // Search for characters that would require unescaping the value.
+ StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
+ if (i != StringRef::npos)
+ return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+ return UnquotedValue;
+ } else if (Value[0] == '\'') { // Single quoted.
+ // Pull off the leading and trailing 's.
+ StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+ StringRef::size_type i = UnquotedValue.find('\'');
+ if (i != StringRef::npos) {
+ // We're going to need Storage.
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
+ StringRef Valid(UnquotedValue.begin(), i);
+ Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ Storage.push_back('\'');
+ UnquotedValue = UnquotedValue.substr(i + 2);
+ }
+ Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ return StringRef(Storage.begin(), Storage.size());
+ }
+ return UnquotedValue;
+ }
+ // Plain or block.
+ size_t trimtrail = Value.rfind(' ');
+ return Value.drop_back(
+ trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
+}
+
+StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
+ , StringRef::size_type i
+ , SmallVectorImpl<char> &Storage)
+ const {
+ // Use Storage to build proper value.
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
+ // Insert all previous chars into Storage.
+ StringRef Valid(UnquotedValue.begin(), i);
+ Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ // Chop off inserted chars.
+ UnquotedValue = UnquotedValue.substr(i);
+
+ assert(!UnquotedValue.empty() && "Can't be empty!");
+
+ // Parse escape or line break.
+ switch (UnquotedValue[0]) {
+ case '\r':
+ case '\n':
+ Storage.push_back('\n');
+ if ( UnquotedValue.size() > 1
+ && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+ UnquotedValue = UnquotedValue.substr(1);
+ UnquotedValue = UnquotedValue.substr(1);
+ break;
+ default:
+ if (UnquotedValue.size() == 1)
+ // TODO: Report error.
+ break;
+ UnquotedValue = UnquotedValue.substr(1);
+ switch (UnquotedValue[0]) {
+ default: {
+ Token T;
+ T.Range = StringRef(UnquotedValue.begin(), 1);
+ setError("Unrecognized escape code!", T);
+ return "";
+ }
+ case '\r':
+ case '\n':
+ // Remove the new line.
+ if ( UnquotedValue.size() > 1
+ && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+ UnquotedValue = UnquotedValue.substr(1);
+ // If this was just a single byte newline, it will get skipped
+ // below.
+ break;
+ case '0':
+ Storage.push_back(0x00);
+ break;
+ case 'a':
+ Storage.push_back(0x07);
+ break;
+ case 'b':
+ Storage.push_back(0x08);
+ break;
+ case 't':
+ case 0x09:
+ Storage.push_back(0x09);
+ break;
+ case 'n':
+ Storage.push_back(0x0A);
+ break;
+ case 'v':
+ Storage.push_back(0x0B);
+ break;
+ case 'f':
+ Storage.push_back(0x0C);
+ break;
+ case 'r':
+ Storage.push_back(0x0D);
+ break;
+ case 'e':
+ Storage.push_back(0x1B);
+ break;
+ case ' ':
+ Storage.push_back(0x20);
+ break;
+ case '"':
+ Storage.push_back(0x22);
+ break;
+ case '/':
+ Storage.push_back(0x2F);
+ break;
+ case '\\':
+ Storage.push_back(0x5C);
+ break;
+ case 'N':
+ encodeUTF8(0x85, Storage);
+ break;
+ case '_':
+ encodeUTF8(0xA0, Storage);
+ break;
+ case 'L':
+ encodeUTF8(0x2028, Storage);
+ break;
+ case 'P':
+ encodeUTF8(0x2029, Storage);
+ break;
+ case 'x': {
+ if (UnquotedValue.size() < 3)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(2);
+ break;
+ }
+ case 'u': {
+ if (UnquotedValue.size() < 5)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(4);
+ break;
+ }
+ case 'U': {
+ if (UnquotedValue.size() < 9)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(8);
+ break;
+ }
+ }
+ UnquotedValue = UnquotedValue.substr(1);
+ }
+ }
+ Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ return StringRef(Storage.begin(), Storage.size());
+}
+
+Node *KeyValueNode::getKey() {
+ if (Key)
+ return Key;
+ // Handle implicit null keys.
+ {
+ Token &t = peekNext();
+ if ( t.Kind == Token::TK_BlockEnd
+ || t.Kind == Token::TK_Value
+ || t.Kind == Token::TK_Error) {
+ return Key = new (getAllocator()) NullNode(Doc);
+ }
+ if (t.Kind == Token::TK_Key)
+ getNext(); // skip TK_Key.
+ }
+
+ // Handle explicit null keys.
+ Token &t = peekNext();
+ if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
+ return Key = new (getAllocator()) NullNode(Doc);
+ }
+
+ // We've got a normal key.
+ return Key = parseBlockNode();
+}
+
+Node *KeyValueNode::getValue() {
+ if (Value)
+ return Value;
+ getKey()->skip();
+ if (failed())
+ return Value = new (getAllocator()) NullNode(Doc);
+
+ // Handle implicit null values.
+ {
+ Token &t = peekNext();
+ if ( t.Kind == Token::TK_BlockEnd
+ || t.Kind == Token::TK_FlowMappingEnd
+ || t.Kind == Token::TK_Key
+ || t.Kind == Token::TK_FlowEntry
+ || t.Kind == Token::TK_Error) {
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
+ if (t.Kind != Token::TK_Value) {
+ setError("Unexpected token in Key Value.", t);
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+ getNext(); // skip TK_Value.
+ }
+
+ // Handle explicit null values.
+ Token &t = peekNext();
+ if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
+ // We got a normal value.
+ return Value = parseBlockNode();
+}
+
+void MappingNode::increment() {
+ if (failed()) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ if (CurrentEntry) {
+ CurrentEntry->skip();
+ if (Type == MT_Inline) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ }
+ Token T = peekNext();
+ if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
+ // KeyValueNode eats the TK_Key. That way it can detect null keys.
+ CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
+ } else if (Type == MT_Block) {
+ switch (T.Kind) {
+ case Token::TK_BlockEnd:
+ getNext();
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError("Unexpected token. Expected Key or Block End", T);
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else {
+ switch (T.Kind) {
+ case Token::TK_FlowEntry:
+ // Eat the flow entry and recurse.
+ getNext();
+ return increment();
+ case Token::TK_FlowMappingEnd:
+ getNext();
+ case Token::TK_Error:
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
+ "Mapping End."
+ , T);
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ }
+}
+
+void SequenceNode::increment() {
+ if (failed()) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ if (CurrentEntry)
+ CurrentEntry->skip();
+ Token T = peekNext();
+ if (SeqType == ST_Block) {
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ getNext();
+ CurrentEntry = parseBlockNode();
+ if (CurrentEntry == 0) { // An error occurred.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ break;
+ case Token::TK_BlockEnd:
+ getNext();
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError( "Unexpected token. Expected Block Entry or Block End."
+ , T);
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else if (SeqType == ST_Indentless) {
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ getNext();
+ CurrentEntry = parseBlockNode();
+ if (CurrentEntry == 0) { // An error occurred.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ break;
+ default:
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else if (SeqType == ST_Flow) {
+ switch (T.Kind) {
+ case Token::TK_FlowEntry:
+ // Eat the flow entry and recurse.
+ getNext();
+ WasPreviousTokenFlowEntry = true;
+ return increment();
+ case Token::TK_FlowSequenceEnd:
+ getNext();
+ case Token::TK_Error:
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ case Token::TK_StreamEnd:
+ case Token::TK_DocumentEnd:
+ case Token::TK_DocumentStart:
+ setError("Could not find closing ]!", T);
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ if (!WasPreviousTokenFlowEntry) {
+ setError("Expected , between entries!", T);
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ }
+ // Otherwise it must be a flow entry.
+ CurrentEntry = parseBlockNode();
+ if (!CurrentEntry) {
+ IsAtEnd = true;
+ }
+ WasPreviousTokenFlowEntry = false;
+ break;
+ }
+ }
+}
+
+Document::Document(Stream &S) : stream(S), Root(0) {
+ if (parseDirectives())
+ expectToken(Token::TK_DocumentStart);
+ Token &T = peekNext();
+ if (T.Kind == Token::TK_DocumentStart)
+ getNext();
+}
+
+bool Document::skip() {
+ if (stream.scanner->failed())
+ return false;
+ if (!Root)
+ getRoot();
+ Root->skip();
+ Token &T = peekNext();
+ if (T.Kind == Token::TK_StreamEnd)
+ return false;
+ if (T.Kind == Token::TK_DocumentEnd) {
+ getNext();
+ return skip();
+ }
+ return true;
+}
+
+Token &Document::peekNext() {
+ return stream.scanner->peekNext();
+}
+
+Token Document::getNext() {
+ return stream.scanner->getNext();
+}
+
+void Document::setError(const Twine &Message, Token &Location) const {
+ stream.scanner->setError(Message, Location.Range.begin());
+}
+
+bool Document::failed() const {
+ return stream.scanner->failed();
+}
+
+Node *Document::parseBlockNode() {
+ Token T = peekNext();
+ // Handle properties.
+ Token AnchorInfo;
+parse_property:
+ switch (T.Kind) {
+ case Token::TK_Alias:
+ getNext();
+ return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
+ case Token::TK_Anchor:
+ if (AnchorInfo.Kind == Token::TK_Anchor) {
+ setError("Already encountered an anchor for this node!", T);
+ return 0;
+ }
+ AnchorInfo = getNext(); // Consume TK_Anchor.
+ T = peekNext();
+ goto parse_property;
+ case Token::TK_Tag:
+ getNext(); // Skip TK_Tag.
+ T = peekNext();
+ goto parse_property;
+ default:
+ break;
+ }
+
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ // We got an unindented BlockEntry sequence. This is not terminated with
+ // a BlockEnd.
+ // Don't eat the TK_BlockEntry, SequenceNode needs it.
+ return new (NodeAllocator) SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Indentless);
+ case Token::TK_BlockSequenceStart:
+ getNext();
+ return new (NodeAllocator)
+ SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Block);
+ case Token::TK_BlockMappingStart:
+ getNext();
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Block);
+ case Token::TK_FlowSequenceStart:
+ getNext();
+ return new (NodeAllocator)
+ SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Flow);
+ case Token::TK_FlowMappingStart:
+ getNext();
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Flow);
+ case Token::TK_Scalar:
+ getNext();
+ return new (NodeAllocator)
+ ScalarNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , T.Range);
+ case Token::TK_Key:
+ // Don't eat the TK_Key, KeyValueNode expects it.
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Inline);
+ case Token::TK_DocumentStart:
+ case Token::TK_DocumentEnd:
+ case Token::TK_StreamEnd:
+ default:
+ // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
+ // !!null null.
+ return new (NodeAllocator) NullNode(stream.CurrentDoc);
+ case Token::TK_Error:
+ return 0;
+ }
+ llvm_unreachable("Control flow shouldn't reach here.");
+ return 0;
+}
+
+bool Document::parseDirectives() {
+ bool isDirective = false;
+ while (true) {
+ Token T = peekNext();
+ if (T.Kind == Token::TK_TagDirective) {
+ handleTagDirective(getNext());
+ isDirective = true;
+ } else if (T.Kind == Token::TK_VersionDirective) {
+ stream.handleYAMLDirective(getNext());
+ isDirective = true;
+ } else
+ break;
+ }
+ return isDirective;
+}
+
+bool Document::expectToken(int TK) {
+ Token T = getNext();
+ if (T.Kind != TK) {
+ setError("Unexpected token", T);
+ return false;
+ }
+ return true;
+}
+
+OwningPtr<Document> document_iterator::NullDoc;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 4927e9a7b9d4..72d3986f41dd 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -20,6 +20,7 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/system_error.h"
#include "llvm/ADT/STLExtras.h"
#include <cctype>
#include <cerrno>
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 0db41346911d..82f72b03eefd 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -6,11 +6,8 @@ add_llvm_library(LLVMTableGen
Error.cpp
Main.cpp
Record.cpp
+ TableGenAction.cpp
TableGenBackend.cpp
TGLexer.cpp
TGParser.cpp
)
-
-add_llvm_library_dependencies(LLVMTableGen
- LLVMSupport
- )
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index 5b2cbbfec4b5..5071ee77ac43 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -21,11 +21,11 @@ namespace llvm {
SourceMgr SrcMgr;
void PrintError(SMLoc ErrorLoc, const Twine &Msg) {
- SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
+ SrcMgr.PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
}
void PrintError(const char *Loc, const Twine &Msg) {
- SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
}
void PrintError(const Twine &Msg) {
diff --git a/lib/TableGen/LLVMBuild.txt b/lib/TableGen/LLVMBuild.txt
new file mode 100644
index 000000000000..54cedfd5918b
--- /dev/null
+++ b/lib/TableGen/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/TableGen/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = TableGen
+parent = Libraries
+required_libraries = Support
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index b7c51cae953c..93eed24b8dc7 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/Format.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
@@ -29,6 +30,8 @@ using namespace llvm;
// std::string wrapper for DenseMap purposes
//===----------------------------------------------------------------------===//
+namespace llvm {
+
/// TableGenStringKey - This is a wrapper for std::string suitable for
/// using as a key to a DenseMap. Because there isn't a particularly
/// good way to indicate tombstone or empty keys for strings, we want
@@ -43,14 +46,16 @@ public:
TableGenStringKey(const char *str) : data(str) {}
const std::string &str() const { return data; }
-
+
+ friend hash_code hash_value(const TableGenStringKey &Value) {
+ using llvm::hash_value;
+ return hash_value(Value.str());
+ }
private:
std::string data;
};
/// Specialize DenseMapInfo for TableGenStringKey.
-namespace llvm {
-
template<> struct DenseMapInfo<TableGenStringKey> {
static inline TableGenStringKey getEmptyKey() {
TableGenStringKey Empty("<<<EMPTY KEY>>>");
@@ -61,7 +66,8 @@ template<> struct DenseMapInfo<TableGenStringKey> {
return Tombstone;
}
static unsigned getHashValue(const TableGenStringKey& Val) {
- return HashString(Val.str());
+ using llvm::hash_value;
+ return hash_value(Val);
}
static bool isEqual(const TableGenStringKey& LHS,
const TableGenStringKey& RHS) {
@@ -69,7 +75,7 @@ template<> struct DenseMapInfo<TableGenStringKey> {
}
};
-}
+} // namespace llvm
//===----------------------------------------------------------------------===//
// Type implementations
@@ -78,9 +84,9 @@ template<> struct DenseMapInfo<TableGenStringKey> {
BitRecTy BitRecTy::Shared;
IntRecTy IntRecTy::Shared;
StringRecTy StringRecTy::Shared;
-CodeRecTy CodeRecTy::Shared;
DagRecTy DagRecTy::Shared;
+void RecTy::anchor() { }
void RecTy::dump() const { print(errs()); }
ListRecTy *RecTy::getListTy() {
@@ -315,12 +321,6 @@ Init *ListRecTy::convertValue(TypedInit *TI) {
return 0;
}
-Init *CodeRecTy::convertValue(TypedInit *TI) {
- if (TI->getType()->typeIsConvertibleTo(this))
- return TI;
- return 0;
-}
-
Init *DagRecTy::convertValue(TypedInit *TI) {
if (TI->getType()->typeIsConvertibleTo(this))
return TI;
@@ -444,13 +444,18 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
// Initializer implementations
//===----------------------------------------------------------------------===//
+void Init::anchor() { }
void Init::dump() const { return print(errs()); }
+void UnsetInit::anchor() { }
+
UnsetInit *UnsetInit::get() {
static UnsetInit TheInit;
return &TheInit;
}
+void BitInit::anchor() { }
+
BitInit *BitInit::get(bool V) {
static BitInit True(true);
static BitInit False(false);
@@ -565,7 +570,9 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
return BitsInit::get(NewBits);
}
-StringInit *StringInit::get(const std::string &V) {
+void StringInit::anchor() { }
+
+StringInit *StringInit::get(StringRef V) {
typedef StringMap<StringInit *> Pool;
static Pool ThePool;
@@ -574,15 +581,6 @@ StringInit *StringInit::get(const std::string &V) {
return I;
}
-CodeInit *CodeInit::get(const std::string &V) {
- typedef StringMap<CodeInit *> Pool;
- static Pool ThePool;
-
- CodeInit *&I = ThePool[V];
- if (!I) I = new CodeInit(V);
- return I;
-}
-
static void ProfileListInit(FoldingSetNodeID &ID,
ArrayRef<Init *> Range,
RecTy *EltTy) {
@@ -735,7 +733,6 @@ UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
- default: assert(0 && "Unknown unop");
case CAST: {
if (getType()->getAsString() == "string") {
StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
@@ -747,6 +744,11 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
if (LHSd) {
return StringInit::get(LHSd->getDef()->getName());
}
+
+ IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+ if (LHSi) {
+ return StringInit::get(LHSi->getAsString());
+ }
} else {
StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
if (LHSs) {
@@ -760,7 +762,9 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
return VarInit::get(Name, RV->getType());
}
- std::string TemplateArgName = CurRec->getName()+":"+Name;
+ Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name,
+ ":");
+
if (CurRec->isTemplateArg(TemplateArgName)) {
const RecordVal *RV = CurRec->getValue(TemplateArgName);
assert(RV && "Template arg doesn't exist??");
@@ -773,7 +777,8 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
}
if (CurMultiClass) {
- std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+ Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name, "::");
+
if (CurMultiClass->Rec.isTemplateArg(MCName)) {
const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
assert(RV && "Template arg doesn't exist??");
@@ -885,7 +890,6 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
- default: assert(0 && "Unknown binop");
case CONCAT: {
DagInit *LHSs = dynamic_cast<DagInit*>(LHS);
DagInit *RHSs = dynamic_cast<DagInit*>(RHS);
@@ -944,7 +948,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
int64_t Result;
switch (getOpcode()) {
- default: assert(0 && "Bad opcode!");
+ default: llvm_unreachable("Bad opcode!");
case SHL: Result = LHSv << RHSv; break;
case SRA: Result = LHSv >> RHSv; break;
case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
@@ -1134,7 +1138,6 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
- default: assert(0 && "Unknown binop");
case SUBST: {
DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
VarInit *LHSv = dynamic_cast<VarInit*>(LHS);
@@ -1298,7 +1301,12 @@ TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
VarInit *VarInit::get(const std::string &VN, RecTy *T) {
- typedef std::pair<RecTy *, TableGenStringKey> Key;
+ Init *Value = StringInit::get(VN);
+ return VarInit::get(Value, T);
+}
+
+VarInit *VarInit::get(Init *VN, RecTy *T) {
+ typedef std::pair<RecTy *, Init *> Key;
typedef DenseMap<Key, VarInit *> Pool;
static Pool ThePool;
@@ -1309,12 +1317,19 @@ VarInit *VarInit::get(const std::string &VN, RecTy *T) {
return I;
}
+const std::string &VarInit::getName() const {
+ StringInit *NameString =
+ dynamic_cast<StringInit *>(getNameInit());
+ assert(NameString && "VarInit name is not a string!");
+ return NameString->getValue();
+}
+
Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV,
unsigned Bit) const {
- if (R.isTemplateArg(getName())) return 0;
- if (IRV && IRV->getName() != getName()) return 0;
+ if (R.isTemplateArg(getNameInit())) return 0;
+ if (IRV && IRV->getNameInit() != getNameInit()) return 0;
- RecordVal *RV = R.getValue(getName());
+ RecordVal *RV = R.getValue(getNameInit());
assert(RV && "Reference to a non-existent variable?");
assert(dynamic_cast<BitsInit*>(RV->getValue()));
BitsInit *BI = (BitsInit*)RV->getValue();
@@ -1333,10 +1348,10 @@ Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV,
Init *VarInit::resolveListElementReference(Record &R,
const RecordVal *IRV,
unsigned Elt) const {
- if (R.isTemplateArg(getName())) return 0;
- if (IRV && IRV->getName() != getName()) return 0;
+ if (R.isTemplateArg(getNameInit())) return 0;
+ if (IRV && IRV->getNameInit() != getNameInit()) return 0;
- RecordVal *RV = R.getValue(getName());
+ RecordVal *RV = R.getValue(getNameInit());
assert(RV && "Reference to a non-existent variable?");
ListInit *LI = dynamic_cast<ListInit*>(RV->getValue());
if (!LI) {
@@ -1659,7 +1674,7 @@ void RecordVal::dump() const { errs() << *this; }
void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
if (getPrefix()) OS << "field ";
- OS << *getType() << " " << getName();
+ OS << *getType() << " " << getNameInitAsString();
if (getValue())
OS << " = " << *getValue();
@@ -1669,13 +1684,22 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
unsigned Record::LastID = 0;
+void Record::init() {
+ checkName();
+
+ // Every record potentially has a def at the top. This value is
+ // replaced with the top-level def name at instantiation time.
+ RecordVal DN("NAME", StringRecTy::get(), 0);
+ addValue(DN);
+}
+
void Record::checkName() {
// Ensure the record name has string type.
const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name);
assert(TypedName && "Record name is not typed!");
RecTy *Type = TypedName->getType();
if (dynamic_cast<StringRecTy *>(Type) == 0) {
- llvm_unreachable("Record name is not a string!");
+ throw "Record name is not a string!";
}
}
@@ -1695,20 +1719,13 @@ const std::string &Record::getName() const {
void Record::setName(Init *NewName) {
if (TrackedRecords.getDef(Name->getAsUnquotedString()) == this) {
TrackedRecords.removeDef(Name->getAsUnquotedString());
- Name = NewName;
TrackedRecords.addDef(this);
- } else {
+ } else if (TrackedRecords.getClass(Name->getAsUnquotedString()) == this) {
TrackedRecords.removeClass(Name->getAsUnquotedString());
- Name = NewName;
TrackedRecords.addClass(this);
- }
+ } // Otherwise this isn't yet registered.
+ Name = NewName;
checkName();
- // Since the Init for the name was changed, see if we can resolve
- // any of it using members of the Record.
- Init *ComputedName = Name->resolveReferences(*this, 0);
- if (ComputedName != Name) {
- setName(ComputedName);
- }
// DO NOT resolve record values to the name at this point because
// there might be default values for arguments of this def. Those
// arguments might not have been resolved yet so we don't want to
@@ -1731,17 +1748,25 @@ void Record::setName(const std::string &Name) {
/// references.
void Record::resolveReferencesTo(const RecordVal *RV) {
for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (RV == &Values[i]) // Skip resolve the same field as the given one
+ continue;
if (Init *V = Values[i].getValue())
Values[i].setValue(V->resolveReferences(*this, RV));
}
+ Init *OldName = getNameInit();
+ Init *NewName = Name->resolveReferences(*this, RV);
+ if (NewName != OldName) {
+ // Re-register with RecordKeeper.
+ setName(NewName);
+ }
}
void Record::dump() const { errs() << *this; }
raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
- OS << R.getName();
+ OS << R.getNameInitAsString();
- const std::vector<std::string> &TArgs = R.getTemplateArgs();
+ const std::vector<Init *> &TArgs = R.getTemplateArgs();
if (!TArgs.empty()) {
OS << "<";
for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
@@ -1758,7 +1783,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
if (!SC.empty()) {
OS << "\t//";
for (unsigned i = 0, e = SC.size(); i != e; ++i)
- OS << " " << SC[i]->getName();
+ OS << " " << SC[i]->getNameInitAsString();
}
OS << "\n";
@@ -1954,18 +1979,6 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const {
"' does not have a dag initializer!";
}
-std::string Record::getValueAsCode(StringRef FieldName) const {
- const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
- throw "Record `" + getName() + "' does not have a field named `" +
- FieldName.str() + "'!\n";
-
- if (CodeInit *CI = dynamic_cast<CodeInit*>(R->getValue()))
- return CI->getValue();
- throw "Record `" + getName() + "', field `" + FieldName.str() +
- "' does not have a code initializer!";
-}
-
void MultiClass::dump() const {
errs() << "Record:\n";
@@ -2017,3 +2030,39 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
return Defs;
}
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+ Init *Name, const std::string &Scoper) {
+ RecTy *Type = dynamic_cast<TypedInit *>(Name)->getType();
+
+ BinOpInit *NewName =
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ CurRec.getNameInit(),
+ StringInit::get(Scoper),
+ Type)->Fold(&CurRec, CurMultiClass),
+ Name,
+ Type);
+
+ if (CurMultiClass && Scoper != "::") {
+ NewName =
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ CurMultiClass->Rec.getNameInit(),
+ StringInit::get("::"),
+ Type)->Fold(&CurRec, CurMultiClass),
+ NewName->Fold(&CurRec, CurMultiClass),
+ Type);
+ }
+
+ return NewName->Fold(&CurRec, CurMultiClass);
+}
+
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+ const std::string &Name,
+ const std::string &Scoper) {
+ return QualifyName(CurRec, CurMultiClass, StringInit::get(Name), Scoper);
+}
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 8c1b4290548d..ff322e74fba2 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -15,7 +15,6 @@
#include "llvm/TableGen/Error.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Config/config.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include <cctype>
@@ -23,6 +22,9 @@
#include <cstdlib>
#include <cstring>
#include <cerrno>
+
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+
using namespace llvm;
TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
@@ -80,6 +82,10 @@ int TGLexer::getNextChar() {
}
}
+int TGLexer::peekNextChar(int Index) {
+ return *(CurPtr + Index);
+}
+
tgtok::TokKind TGLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
@@ -87,10 +93,10 @@ tgtok::TokKind TGLexer::LexToken() {
switch (CurChar) {
default:
- // Handle letters: [a-zA-Z_#]
- if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
return LexIdentifier();
-
+
// Unknown character, emit an error.
return ReturnError(TokStart, "Unexpected character");
case EOF: return tgtok::Eof;
@@ -107,6 +113,7 @@ tgtok::TokKind TGLexer::LexToken() {
case ')': return tgtok::r_paren;
case '=': return tgtok::equal;
case '?': return tgtok::question;
+ case '#': return tgtok::paste;
case 0:
case ' ':
@@ -128,8 +135,44 @@ tgtok::TokKind TGLexer::LexToken() {
return LexToken();
case '-': case '+':
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
- case '7': case '8': case '9':
+ case '7': case '8': case '9': {
+ int NextChar = 0;
+ if (isdigit(CurChar)) {
+ // Allow identifiers to start with a number if it is followed by
+ // an identifier. This can happen with paste operations like
+ // foo#8i.
+ int i = 0;
+ do {
+ NextChar = peekNextChar(i++);
+ } while (isdigit(NextChar));
+
+ if (NextChar == 'x' || NextChar == 'b') {
+ // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+ // likely a number.
+ int NextNextChar = peekNextChar(i);
+ switch (NextNextChar) {
+ default:
+ break;
+ case '0': case '1':
+ if (NextChar == 'b')
+ return LexNumber();
+ // Fallthrough
+ case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if (NextChar == 'x')
+ return LexNumber();
+ break;
+ }
+ }
+ }
+
+ if (isalpha(NextChar) || NextChar == '_')
+ return LexIdentifier();
+
return LexNumber();
+ }
case '"': return LexString();
case '$': return LexVarName();
case '[': return LexBracket();
@@ -210,8 +253,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
const char *IdentStart = TokStart;
// Match the rest of the identifier regex: [0-9a-zA-Z_#]*
- while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
- *CurPtr == '#')
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
++CurPtr;
// Check to see if this identifier is a keyword.
@@ -232,6 +274,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
.Case("dag", tgtok::Dag)
.Case("class", tgtok::Class)
.Case("def", tgtok::Def)
+ .Case("foreach", tgtok::Foreach)
.Case("defm", tgtok::Defm)
.Case("multiclass", tgtok::MultiClass)
.Case("field", tgtok::Field)
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 84d328b12d97..8a850b5cec8e 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -39,9 +39,10 @@ namespace tgtok {
colon, semi, // : ;
comma, period, // , .
equal, question, // = ?
-
+ paste, // #
+
// Keywords.
- Bit, Bits, Class, Code, Dag, Def, Defm, Field, In, Int, Let, List,
+ Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
MultiClass, String,
// !keywords.
@@ -109,6 +110,7 @@ private:
tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
int getNextChar();
+ int peekNextChar(int Index);
void SkipBCPLComment();
bool SkipCComment();
tgtok::TokKind LexIdentifier();
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index e7f00baf4931..04c4fc158ff7 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -64,7 +64,7 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
if (CurRec == 0)
CurRec = &CurMultiClass->Rec;
- if (RecordVal *ERV = CurRec->getValue(RV.getName())) {
+ if (RecordVal *ERV = CurRec->getValue(RV.getNameInit())) {
// The value already exists in the class, treat this as a set.
if (ERV->setValue(RV.getValue()))
return Error(Loc, "New definition of '" + RV.getName() + "' of type '" +
@@ -79,7 +79,7 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
/// SetValue -
/// Return true on error, false on success.
-bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
+bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
const std::vector<unsigned> &BitList, Init *V) {
if (!V) return false;
@@ -87,13 +87,14 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
RecordVal *RV = CurRec->getValue(ValName);
if (RV == 0)
- return Error(Loc, "Value '" + ValName + "' unknown!");
+ return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ + "' unknown!");
// Do not allow assignments like 'X = X'. This will just cause infinite loops
// in the resolution machinery.
if (BitList.empty())
if (VarInit *VI = dynamic_cast<VarInit*>(V))
- if (VI->getName() == ValName)
+ if (VI->getNameInit() == ValName)
return false;
// If we are assigning to a subset of the bits in the value... then we must be
@@ -103,7 +104,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
if (!BitList.empty()) {
BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue());
if (CurVal == 0)
- return Error(Loc, "Value '" + ValName + "' is not a bits type");
+ return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ + "' is not a bits type");
// Convert the incoming value to a bits type of the appropriate size...
Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size()));
@@ -123,7 +125,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
unsigned Bit = BitList[i];
if (NewBits[Bit])
return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" +
- ValName + "' more than once");
+ ValName->getAsUnquotedString() + "' more than once");
NewBits[Bit] = BInit->getBit(i);
}
@@ -135,9 +137,10 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
}
if (RV->setValue(V))
- return Error(Loc, "Value '" + ValName + "' of type '" +
- RV->getType()->getAsString() +
- "' is incompatible with initializer '" + V->getAsString() +"'");
+ return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' of type '"
+ + RV->getType()->getAsString() +
+ "' is incompatible with initializer '" + V->getAsString()
+ + "'");
return false;
}
@@ -151,7 +154,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
if (AddValue(CurRec, SubClass.RefLoc, Vals[i]))
return true;
- const std::vector<std::string> &TArgs = SC->getTemplateArgs();
+ const std::vector<Init *> &TArgs = SC->getTemplateArgs();
// Ensure that an appropriate number of template arguments are specified.
if (TArgs.size() < SubClass.TemplateArgs.size())
@@ -174,8 +177,8 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
} else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
return Error(SubClass.RefLoc,"Value not specified for template argument #"
- + utostr(i) + " (" + TArgs[i] + ") of subclass '" +
- SC->getName() + "'!");
+ + utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
+ + ") of subclass '" + SC->getNameInitAsString() + "'!");
}
}
@@ -230,7 +233,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
CurMC->DefPrototypes.push_back(NewDef);
}
- const std::vector<std::string> &SMCTArgs = SMC->Rec.getTemplateArgs();
+ const std::vector<Init *> &SMCTArgs = SMC->Rec.getTemplateArgs();
// Ensure that an appropriate number of template arguments are
// specified.
@@ -278,14 +281,121 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
} else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
return Error(SubMultiClass.RefLoc,
"Value not specified for template argument #"
- + utostr(i) + " (" + SMCTArgs[i] + ") of subclass '" +
- SMC->Rec.getName() + "'!");
+ + utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString()
+ + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
}
}
return false;
}
+/// ProcessForeachDefs - Given a record, apply all of the variable
+/// values in all surrounding foreach loops, creating new records for
+/// each combination of values.
+bool TGParser::ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+ SMLoc Loc) {
+ // We want to instantiate a new copy of CurRec for each combination
+ // of nested loop iterator values. We don't want top instantiate
+ // any copies until we have values for each loop iterator.
+ IterSet IterVals;
+ for (LoopVector::iterator Loop = Loops.begin(), LoopEnd = Loops.end();
+ Loop != LoopEnd;
+ ++Loop) {
+ // Process this loop.
+ if (ProcessForeachDefs(CurRec, CurMultiClass, Loc,
+ IterVals, *Loop, Loop+1)) {
+ Error(Loc,
+ "Could not process loops for def " + CurRec->getNameInitAsString());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// ProcessForeachDefs - Given a record, a loop and a loop iterator,
+/// apply each of the variable values in this loop and then process
+/// subloops.
+bool TGParser::ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+ SMLoc Loc, IterSet &IterVals,
+ ForeachLoop &CurLoop,
+ LoopVector::iterator NextLoop) {
+ Init *IterVar = CurLoop.IterVar;
+ ListInit *List = dynamic_cast<ListInit *>(CurLoop.ListValue);
+
+ if (List == 0) {
+ Error(Loc, "Loop list is not a list");
+ return true;
+ }
+
+ // Process each value.
+ for (int64_t i = 0; i < List->getSize(); ++i) {
+ Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i);
+ IterVals.push_back(IterRecord(IterVar, ItemVal));
+
+ if (IterVals.size() == Loops.size()) {
+ // Ok, we have all of the iterator values for this point in the
+ // iteration space. Instantiate a new record to reflect this
+ // combination of values.
+ Record *IterRec = new Record(*CurRec);
+
+ // Set the iterator values now.
+ for (IterSet::iterator i = IterVals.begin(), iend = IterVals.end();
+ i != iend;
+ ++i) {
+ VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar);
+ if (IterVar == 0) {
+ Error(Loc, "foreach iterator is unresolved");
+ return true;
+ }
+
+ TypedInit *IVal = dynamic_cast<TypedInit *>(i->IterValue);
+ if (IVal == 0) {
+ Error(Loc, "foreach iterator value is untyped");
+ return true;
+ }
+
+ IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
+
+ if (SetValue(IterRec, Loc, IterVar->getName(),
+ std::vector<unsigned>(), IVal)) {
+ Error(Loc, "when instantiating this def");
+ return true;
+ }
+
+ // Resolve it next.
+ IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getName()));
+
+ // Remove it.
+ IterRec->removeValue(IterVar->getName());
+ }
+
+ if (Records.getDef(IterRec->getNameInitAsString())) {
+ Error(Loc, "def already exists: " + IterRec->getNameInitAsString());
+ return true;
+ }
+
+ Records.addDef(IterRec);
+ IterRec->resolveReferences();
+ }
+
+ if (NextLoop != Loops.end()) {
+ // Process nested loops.
+ if (ProcessForeachDefs(CurRec, CurMultiClass, Loc, IterVals, *NextLoop,
+ NextLoop+1)) {
+ Error(Loc,
+ "Could not process loops for def " +
+ CurRec->getNameInitAsString());
+ return true;
+ }
+ }
+
+ // We're done with this iterator.
+ IterVals.pop_back();
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Parser Code
//===----------------------------------------------------------------------===//
@@ -293,7 +403,8 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
/// isObjectStart - Return true if this is a valid first token for an Object.
static bool isObjectStart(tgtok::TokKind K) {
return K == tgtok::Class || K == tgtok::Def ||
- K == tgtok::Defm || K == tgtok::Let || K == tgtok::MultiClass;
+ K == tgtok::Defm || K == tgtok::Let ||
+ K == tgtok::MultiClass || K == tgtok::Foreach;
}
static std::string GetNewAnonymousName() {
@@ -303,18 +414,39 @@ static std::string GetNewAnonymousName() {
/// ParseObjectName - If an object name is specified, return it. Otherwise,
/// return an anonymous name.
-/// ObjectName ::= ID
+/// ObjectName ::= Value [ '#' Value ]*
/// ObjectName ::= /*empty*/
///
-std::string TGParser::ParseObjectName() {
- if (Lex.getCode() != tgtok::Id)
- return GetNewAnonymousName();
+Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
+ switch (Lex.getCode()) {
+ case tgtok::colon:
+ case tgtok::semi:
+ case tgtok::l_brace:
+ // These are all of the tokens that can begin an object body.
+ // Some of these can also begin values but we disallow those cases
+ // because they are unlikely to be useful.
+ return StringInit::get(GetNewAnonymousName());
+ default:
+ break;
+ }
- std::string Ret = Lex.getCurStrVal();
- Lex.Lex();
- return Ret;
-}
+ Record *CurRec = 0;
+ if (CurMultiClass)
+ CurRec = &CurMultiClass->Rec;
+
+ RecTy *Type = 0;
+ if (CurRec) {
+ const TypedInit *CurRecName =
+ dynamic_cast<const TypedInit *>(CurRec->getNameInit());
+ if (!CurRecName) {
+ TokError("Record name is not typed!");
+ return 0;
+ }
+ Type = CurRecName->getType();
+ }
+ return ParseValue(CurRec, Type, ParseNameMode);
+}
/// ParseClassID - Parse and resolve a reference to a class name. This returns
/// null on error.
@@ -570,11 +702,11 @@ bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
/// ParseType - Parse and return a tblgen type. This returns null on error.
///
/// Type ::= STRING // string type
+/// Type ::= CODE // code type
/// Type ::= BIT // bit type
/// Type ::= BITS '<' INTVAL '>' // bits<x> type
/// Type ::= INT // int type
/// Type ::= LIST '<' Type '>' // list<x> type
-/// Type ::= CODE // code type
/// Type ::= DAG // dag type
/// Type ::= ClassID // Record Type
///
@@ -582,9 +714,9 @@ RecTy *TGParser::ParseType() {
switch (Lex.getCode()) {
default: TokError("Unknown token when expecting a type"); return 0;
case tgtok::String: Lex.Lex(); return StringRecTy::get();
+ case tgtok::Code: Lex.Lex(); return StringRecTy::get();
case tgtok::Bit: Lex.Lex(); return BitRecTy::get();
case tgtok::Int: Lex.Lex(); return IntRecTy::get();
- case tgtok::Code: Lex.Lex(); return CodeRecTy::get();
case tgtok::Dag: Lex.Lex(); return DagRecTy::get();
case tgtok::Id:
if (Record *R = ParseClassID()) return RecordRecTy::get(R);
@@ -633,7 +765,7 @@ RecTy *TGParser::ParseType() {
/// IDValue ::= ID [multiclass template argument]
/// IDValue ::= ID [def name]
///
-Init *TGParser::ParseIDValue(Record *CurRec) {
+Init *TGParser::ParseIDValue(Record *CurRec, IDParseMode Mode) {
assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue");
std::string Name = Lex.getCurStrVal();
SMLoc Loc = Lex.getLoc();
@@ -644,14 +776,17 @@ Init *TGParser::ParseIDValue(Record *CurRec) {
/// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
/// has already been read.
Init *TGParser::ParseIDValue(Record *CurRec,
- const std::string &Name, SMLoc NameLoc) {
+ const std::string &Name, SMLoc NameLoc,
+ IDParseMode Mode) {
if (CurRec) {
if (const RecordVal *RV = CurRec->getValue(Name))
return VarInit::get(Name, RV->getType());
- std::string TemplateArgName = CurRec->getName()+":"+Name;
+ Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name, ":");
+
if (CurMultiClass)
- TemplateArgName = CurMultiClass->Rec.getName()+"::"+TemplateArgName;
+ TemplateArgName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+ "::");
if (CurRec->isTemplateArg(TemplateArgName)) {
const RecordVal *RV = CurRec->getValue(TemplateArgName);
@@ -661,7 +796,9 @@ Init *TGParser::ParseIDValue(Record *CurRec,
}
if (CurMultiClass) {
- std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+ Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+ "::");
+
if (CurMultiClass->Rec.isTemplateArg(MCName)) {
const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
assert(RV && "Template arg doesn't exist??");
@@ -669,11 +806,27 @@ Init *TGParser::ParseIDValue(Record *CurRec,
}
}
+ // If this is in a foreach loop, make sure it's not a loop iterator
+ for (LoopVector::iterator i = Loops.begin(), iend = Loops.end();
+ i != iend;
+ ++i) {
+ VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar);
+ if (IterVar && IterVar->getName() == Name)
+ return IterVar;
+ }
+
+ if (Mode == ParseNameMode)
+ return StringInit::get(Name);
+
if (Record *D = Records.getDef(Name))
return DefInit::get(D);
- Error(NameLoc, "Variable not defined: '" + Name + "'");
- return 0;
+ if (Mode == ParseValueMode) {
+ Error(NameLoc, "Variable not defined: '" + Name + "'");
+ return 0;
+ }
+
+ return StringInit::get(Name);
}
/// ParseOperation - Parse an operator. This returns null on error.
@@ -685,7 +838,6 @@ Init *TGParser::ParseOperation(Record *CurRec) {
default:
TokError("unknown operation");
return 0;
- break;
case tgtok::XHead:
case tgtok::XTail:
case tgtok::XEmpty:
@@ -694,7 +846,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
RecTy *Type = 0;
switch (Lex.getCode()) {
- default: assert(0 && "Unhandled code!");
+ default: llvm_unreachable("Unhandled code!");
case tgtok::XCast:
Lex.Lex(); // eat the operation
Code = UnOpInit::CAST;
@@ -810,7 +962,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
RecTy *Type = 0;
switch (OpTok) {
- default: assert(0 && "Unhandled code!");
+ default: llvm_unreachable("Unhandled code!");
case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
case tgtok::XSRA: Code = BinOpInit::SRA; Type = IntRecTy::get(); break;
case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break;
@@ -874,7 +1026,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
tgtok::TokKind LexCode = Lex.getCode();
Lex.Lex(); // eat the operation
switch (LexCode) {
- default: assert(0 && "Unhandled code!");
+ default: llvm_unreachable("Unhandled code!");
case tgtok::XIf:
Code = TernOpInit::IF;
break;
@@ -919,7 +1071,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
Lex.Lex(); // eat the ')'
switch (LexCode) {
- default: assert(0 && "Unhandled code!");
+ default: llvm_unreachable("Unhandled code!");
case tgtok::XIf: {
// FIXME: The `!if' operator doesn't handle non-TypedInit well at
// all. This can be made much more robust.
@@ -989,8 +1141,6 @@ Init *TGParser::ParseOperation(Record *CurRec) {
CurMultiClass);
}
}
- TokError("could not parse operation");
- return 0;
}
/// ParseOperatorType - Parse a type for an operator. This returns
@@ -1041,10 +1191,16 @@ RecTy *TGParser::ParseOperatorType() {
/// SimpleValue ::= SRLTOK '(' Value ',' Value ')'
/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
///
-Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
+Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
+ IDParseMode Mode) {
Init *R = 0;
switch (Lex.getCode()) {
default: TokError("Unknown token when parsing a value"); break;
+ case tgtok::paste:
+ // This is a leading paste operation. This is deprecated but
+ // still exists in some .td files. Ignore it.
+ Lex.Lex(); // Skip '#'.
+ return ParseSimpleValue(CurRec, ItemType, Mode);
case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break;
case tgtok::StrVal: {
std::string Val = Lex.getCurStrVal();
@@ -1060,7 +1216,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
break;
}
case tgtok::CodeFragment:
- R = CodeInit::get(Lex.getCurStrVal());
+ R = StringInit::get(Lex.getCurStrVal());
Lex.Lex();
break;
case tgtok::question:
@@ -1071,7 +1227,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
SMLoc NameLoc = Lex.getLoc();
std::string Name = Lex.getCurStrVal();
if (Lex.Lex() != tgtok::less) // consume the Id.
- return ParseIDValue(CurRec, Name, NameLoc); // Value ::= IDValue
+ return ParseIDValue(CurRec, Name, NameLoc, Mode); // Value ::= IDValue
// Value ::= ID '<' ValueListNE '>'
if (Lex.Lex() == tgtok::greater) {
@@ -1305,8 +1461,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
/// ValueSuffix ::= '[' BitList ']'
/// ValueSuffix ::= '.' ID
///
-Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
- Init *Result = ParseSimpleValue(CurRec, ItemType);
+Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
+ Init *Result = ParseSimpleValue(CurRec, ItemType, Mode);
if (Result == 0) return 0;
// Parse the suffixes now if present.
@@ -1314,6 +1470,10 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
switch (Lex.getCode()) {
default: return Result;
case tgtok::l_brace: {
+ if (Mode == ParseNameMode || Mode == ParseForeachMode)
+ // This is the beginning of the object body.
+ return Result;
+
SMLoc CurlyLoc = Lex.getLoc();
Lex.Lex(); // eat the '{'
std::vector<unsigned> Ranges = ParseRangeList();
@@ -1368,6 +1528,56 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
Result = FieldInit::get(Result, Lex.getCurStrVal());
Lex.Lex(); // eat field name
break;
+
+ case tgtok::paste:
+ SMLoc PasteLoc = Lex.getLoc();
+
+ // Create a !strconcat() operation, first casting each operand to
+ // a string if necessary.
+
+ TypedInit *LHS = dynamic_cast<TypedInit *>(Result);
+ if (!LHS) {
+ Error(PasteLoc, "LHS of paste is not typed!");
+ return 0;
+ }
+
+ if (LHS->getType() != StringRecTy::get()) {
+ LHS = UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get());
+ }
+
+ TypedInit *RHS = 0;
+
+ Lex.Lex(); // Eat the '#'.
+ switch (Lex.getCode()) {
+ case tgtok::colon:
+ case tgtok::semi:
+ case tgtok::l_brace:
+ // These are all of the tokens that can begin an object body.
+ // Some of these can also begin values but we disallow those cases
+ // because they are unlikely to be useful.
+
+ // Trailing paste, concat with an empty string.
+ RHS = StringInit::get("");
+ break;
+
+ default:
+ Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
+ RHS = dynamic_cast<TypedInit *>(RHSResult);
+ if (!RHS) {
+ Error(PasteLoc, "RHS of paste is not typed!");
+ return 0;
+ }
+
+ if (RHS->getType() != StringRecTy::get()) {
+ RHS = UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get());
+ }
+
+ break;
+ }
+
+ Result = BinOpInit::get(BinOpInit::STRCONCAT, LHS, RHS,
+ StringRecTy::get())->Fold(CurRec, CurMultiClass);
+ break;
}
}
}
@@ -1417,7 +1627,11 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
RecTy *ItemType = EltTy;
unsigned int ArgN = 0;
if (ArgsRec != 0 && EltTy == 0) {
- const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+ const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+ if (!TArgs.size()) {
+ TokError("template argument provided to non-template class");
+ return std::vector<Init*>();
+ }
const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
if (!RV) {
errs() << "Cannot find template arg " << ArgN << " (" << TArgs[ArgN]
@@ -1434,7 +1648,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
Lex.Lex(); // Eat the comma
if (ArgsRec != 0 && EltTy == 0) {
- const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+ const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
if (ArgN >= TArgs.size()) {
TokError("too many template arguments");
return std::vector<Init*>();
@@ -1462,37 +1676,38 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
///
/// Declaration ::= FIELD? Type ID ('=' Value)?
///
-std::string TGParser::ParseDeclaration(Record *CurRec,
+Init *TGParser::ParseDeclaration(Record *CurRec,
bool ParsingTemplateArgs) {
// Read the field prefix if present.
bool HasField = Lex.getCode() == tgtok::Field;
if (HasField) Lex.Lex();
RecTy *Type = ParseType();
- if (Type == 0) return "";
+ if (Type == 0) return 0;
if (Lex.getCode() != tgtok::Id) {
TokError("Expected identifier in declaration");
- return "";
+ return 0;
}
SMLoc IdLoc = Lex.getLoc();
- std::string DeclName = Lex.getCurStrVal();
+ Init *DeclName = StringInit::get(Lex.getCurStrVal());
Lex.Lex();
if (ParsingTemplateArgs) {
if (CurRec) {
- DeclName = CurRec->getName() + ":" + DeclName;
+ DeclName = QualifyName(*CurRec, CurMultiClass, DeclName, ":");
} else {
assert(CurMultiClass);
}
if (CurMultiClass)
- DeclName = CurMultiClass->Rec.getName() + "::" + DeclName;
+ DeclName = QualifyName(CurMultiClass->Rec, CurMultiClass, DeclName,
+ "::");
}
// Add the value.
if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
- return "";
+ return 0;
// If a value is present, parse it.
if (Lex.getCode() == tgtok::equal) {
@@ -1501,12 +1716,56 @@ std::string TGParser::ParseDeclaration(Record *CurRec,
Init *Val = ParseValue(CurRec, Type);
if (Val == 0 ||
SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
- return "";
+ return 0;
}
return DeclName;
}
+/// ParseForeachDeclaration - Read a foreach declaration, returning
+/// the name of the declared object or a NULL Init on error. Return
+/// the name of the parsed initializer list through ForeachListName.
+///
+/// ForeachDeclaration ::= ID '=' Value
+///
+Init *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("Expected identifier in foreach declaration");
+ return 0;
+ }
+
+ Init *DeclName = StringInit::get(Lex.getCurStrVal());
+ Lex.Lex();
+
+ // If a value is present, parse it.
+ if (Lex.getCode() != tgtok::equal) {
+ TokError("Expected '=' in foreach declaration");
+ return 0;
+ }
+ Lex.Lex(); // Eat the '='
+
+ // Expect a list initializer.
+ ForeachListValue = ParseValue(0, 0, ParseForeachMode);
+
+ TypedInit *TypedList = dynamic_cast<TypedInit *>(ForeachListValue);
+ if (TypedList == 0) {
+ TokError("Value list is untyped");
+ return 0;
+ }
+
+ RecTy *ValueType = TypedList->getType();
+ ListRecTy *ListType = dynamic_cast<ListRecTy *>(ValueType);
+ if (ListType == 0) {
+ TokError("Value list is not of list type");
+ return 0;
+ }
+
+ RecTy *IterType = ListType->getElementType();
+ VarInit *IterVar = VarInit::get(DeclName, IterType);
+
+ return IterVar;
+}
+
/// ParseTemplateArgList - Read a template argument list, which is a non-empty
/// sequence of template-declarations in <>'s. If CurRec is non-null, these are
/// template args for a def, which may or may not be in a multiclass. If null,
@@ -1521,8 +1780,8 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
Record *TheRecToAddTo = CurRec ? CurRec : &CurMultiClass->Rec;
// Read the first declaration.
- std::string TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
- if (TemplArg.empty())
+ Init *TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+ if (TemplArg == 0)
return true;
TheRecToAddTo->addTemplateArg(TemplArg);
@@ -1532,7 +1791,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
// Read the following declarations.
TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
- if (TemplArg.empty())
+ if (TemplArg == 0)
return true;
TheRecToAddTo->addTemplateArg(TemplArg);
}
@@ -1550,7 +1809,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
bool TGParser::ParseBodyItem(Record *CurRec) {
if (Lex.getCode() != tgtok::Let) {
- if (ParseDeclaration(CurRec, false).empty())
+ if (ParseDeclaration(CurRec, false) == 0)
return true;
if (Lex.getCode() != tgtok::semi)
@@ -1671,22 +1930,24 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
Lex.Lex(); // Eat the 'def' token.
// Parse ObjectName and make a record for it.
- Record *CurRec = new Record(ParseObjectName(), DefLoc, Records);
+ Record *CurRec = new Record(ParseObjectName(CurMultiClass), DefLoc, Records);
if (!CurMultiClass) {
// Top-level def definition.
// Ensure redefinition doesn't happen.
- if (Records.getDef(CurRec->getName())) {
- Error(DefLoc, "def '" + CurRec->getName() + "' already defined");
+ if (Records.getDef(CurRec->getNameInitAsString())) {
+ Error(DefLoc, "def '" + CurRec->getNameInitAsString()
+ + "' already defined");
return true;
}
Records.addDef(CurRec);
} else {
// Otherwise, a def inside a multiclass, add it to the multiclass.
for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
- if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) {
- Error(DefLoc, "def '" + CurRec->getName() +
+ if (CurMultiClass->DefPrototypes[i]->getNameInit()
+ == CurRec->getNameInit()) {
+ Error(DefLoc, "def '" + CurRec->getNameInitAsString() +
"' already defined in this multiclass!");
return true;
}
@@ -1707,7 +1968,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
if (CurMultiClass) {
// Copy the template arguments for the multiclass into the def.
- const std::vector<std::string> &TArgs =
+ const std::vector<Init *> &TArgs =
CurMultiClass->Rec.getTemplateArgs();
for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
@@ -1717,6 +1978,63 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
}
}
+ if (ProcessForeachDefs(CurRec, CurMultiClass, DefLoc)) {
+ Error(DefLoc,
+ "Could not process loops for def" + CurRec->getNameInitAsString());
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseForeach - Parse a for statement. Return the record corresponding
+/// to it. This returns true on error.
+///
+/// Foreach ::= FOREACH Declaration IN '{ ObjectList '}'
+/// Foreach ::= FOREACH Declaration IN Object
+///
+bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
+ assert(Lex.getCode() == tgtok::Foreach && "Unknown tok");
+ Lex.Lex(); // Eat the 'for' token.
+
+ // Make a temporary object to record items associated with the for
+ // loop.
+ Init *ListValue = 0;
+ Init *IterName = ParseForeachDeclaration(ListValue);
+ if (IterName == 0)
+ return TokError("expected declaration in for");
+
+ if (Lex.getCode() != tgtok::In)
+ return TokError("Unknown tok");
+ Lex.Lex(); // Eat the in
+
+ // Create a loop object and remember it.
+ Loops.push_back(ForeachLoop(IterName, ListValue));
+
+ if (Lex.getCode() != tgtok::l_brace) {
+ // FOREACH Declaration IN Object
+ if (ParseObject(CurMultiClass))
+ return true;
+ }
+ else {
+ SMLoc BraceLoc = Lex.getLoc();
+ // Otherwise, this is a group foreach.
+ Lex.Lex(); // eat the '{'.
+
+ // Parse the object list.
+ if (ParseObjectList(CurMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of foreach command");
+ return Error(BraceLoc, "to match this '{'");
+ }
+ Lex.Lex(); // Eat the }
+ }
+
+ // We've processed everything in this loop.
+ Loops.pop_back();
+
return false;
}
@@ -1734,10 +2052,11 @@ bool TGParser::ParseClass() {
Record *CurRec = Records.getClass(Lex.getCurStrVal());
if (CurRec) {
// If the body was previously defined, this is an error.
- if (!CurRec->getValues().empty() ||
+ if (CurRec->getValues().size() > 1 || // Account for NAME.
!CurRec->getSuperClasses().empty() ||
!CurRec->getTemplateArgs().empty())
- return TokError("Class '" + CurRec->getName() + "' already defined");
+ return TokError("Class '" + CurRec->getNameInitAsString()
+ + "' already defined");
} else {
// If this is the first reference to this class, create and add it.
CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records);
@@ -1909,6 +2228,7 @@ bool TGParser::ParseMultiClass() {
case tgtok::Let:
case tgtok::Def:
case tgtok::Defm:
+ case tgtok::Foreach:
if (ParseObject(CurMultiClass))
return true;
break;
@@ -1924,23 +2244,31 @@ bool TGParser::ParseMultiClass() {
Record *TGParser::
InstantiateMulticlassDef(MultiClass &MC,
Record *DefProto,
- const std::string &DefmPrefix,
+ Init *DefmPrefix,
SMLoc DefmPrefixLoc) {
+ // We need to preserve DefProto so it can be reused for later
+ // instantiations, so create a new Record to inherit from it.
+
// Add in the defm name. If the defm prefix is empty, give each
// instantiated def a unique name. Otherwise, if "#NAME#" exists in the
// name, substitute the prefix for #NAME#. Otherwise, use the defm name
// as a prefix.
- std::string DefName = DefProto->getName();
- if (DefmPrefix.empty()) {
- DefName = GetNewAnonymousName();
- } else {
- std::string::size_type idx = DefName.find("#NAME#");
- if (idx != std::string::npos) {
- DefName.replace(idx, 6, DefmPrefix);
- } else {
- // Add the suffix to the defm name to get the new name.
- DefName = DefmPrefix + DefName;
- }
+
+ if (DefmPrefix == 0)
+ DefmPrefix = StringInit::get(GetNewAnonymousName());
+
+ Init *DefName = DefProto->getNameInit();
+
+ StringInit *DefNameString = dynamic_cast<StringInit *>(DefName);
+
+ if (DefNameString != 0) {
+ // We have a fully expanded string so there are no operators to
+ // resolve. We should concatenate the given prefix and name.
+ DefName =
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ UnOpInit::get(UnOpInit::CAST, DefmPrefix,
+ StringRecTy::get())->Fold(DefProto, &MC),
+ DefName, StringRecTy::get())->Fold(DefProto, &MC);
}
Record *CurRec = new Record(DefName, DefmPrefixLoc, Records);
@@ -1950,6 +2278,41 @@ InstantiateMulticlassDef(MultiClass &MC,
Ref.Rec = DefProto;
AddSubClass(CurRec, Ref);
+ if (DefNameString == 0) {
+ // We must resolve references to NAME.
+ if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
+ DefmPrefix)) {
+ Error(DefmPrefixLoc, "Could not resolve "
+ + CurRec->getNameInitAsString() + ":NAME to '"
+ + DefmPrefix->getAsUnquotedString() + "'");
+ return 0;
+ }
+
+ RecordVal *DefNameRV = CurRec->getValue("NAME");
+ CurRec->resolveReferencesTo(DefNameRV);
+ }
+
+ if (!CurMultiClass) {
+ // We do this after resolving NAME because before resolution, many
+ // multiclass defs will have the same name expression. If we are
+ // currently in a multiclass, it means this defm appears inside a
+ // multiclass and its name won't be fully resolvable until we see
+ // the top-level defm. Therefore, we don't add this to the
+ // RecordKeeper at this point. If we did we could get duplicate
+ // defs as more than one probably refers to NAME or some other
+ // common internal placeholder.
+
+ // Ensure redefinition doesn't happen.
+ if (Records.getDef(CurRec->getNameInitAsString())) {
+ Error(DefmPrefixLoc, "def '" + CurRec->getNameInitAsString() +
+ "' already defined, instantiating defm with subdef '" +
+ DefProto->getNameInitAsString() + "'");
+ return 0;
+ }
+
+ Records.addDef(CurRec);
+ }
+
return CurRec;
}
@@ -1957,7 +2320,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
Record *CurRec,
SMLoc DefmPrefixLoc,
SMLoc SubClassLoc,
- const std::vector<std::string> &TArgs,
+ const std::vector<Init *> &TArgs,
std::vector<Init *> &TemplateVals,
bool DeleteArgs) {
// Loop over all of the template arguments, setting them to the specified
@@ -1979,8 +2342,9 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
} else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
return Error(SubClassLoc, "value not specified for template argument #"+
- utostr(i) + " (" + TArgs[i] + ") of multiclassclass '" +
- MC.Rec.getName() + "'");
+ utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
+ + ") of multiclassclass '" + MC.Rec.getNameInitAsString()
+ + "'");
}
}
return false;
@@ -1997,25 +2361,20 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
LetStack[i][j].Bits, LetStack[i][j].Value))
return Error(DefmPrefixLoc, "when instantiating this defm");
- // Ensure redefinition doesn't happen.
- if (Records.getDef(CurRec->getName()))
- return Error(DefmPrefixLoc, "def '" + CurRec->getName() +
- "' already defined, instantiating defm with subdef '" +
- DefProto->getName() + "'");
-
// Don't create a top level definition for defm inside multiclasses,
// instead, only update the prototypes and bind the template args
// with the new created definition.
if (CurMultiClass) {
for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
i != e; ++i)
- if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName())
- return Error(DefmPrefixLoc, "defm '" + CurRec->getName() +
+ if (CurMultiClass->DefPrototypes[i]->getNameInit()
+ == CurRec->getNameInit())
+ return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() +
"' already defined in this multiclass!");
CurMultiClass->DefPrototypes.push_back(CurRec);
// Copy the template arguments for the multiclass into the new def.
- const std::vector<std::string> &TA =
+ const std::vector<Init *> &TA =
CurMultiClass->Rec.getTemplateArgs();
for (unsigned i = 0, e = TA.size(); i != e; ++i) {
@@ -2023,8 +2382,6 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
assert(RV && "Template arg doesn't exist?");
CurRec->addValue(*RV);
}
- } else {
- Records.addDef(CurRec);
}
return false;
@@ -2037,10 +2394,10 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
- std::string DefmPrefix;
+ Init *DefmPrefix = 0;
+
if (Lex.Lex() == tgtok::Id) { // eat the defm.
- DefmPrefix = Lex.getCurStrVal();
- Lex.Lex(); // Eat the defm prefix.
+ DefmPrefix = ParseObjectName(CurMultiClass);
}
SMLoc DefmPrefixLoc = Lex.getLoc();
@@ -2070,7 +2427,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
// Verify that the correct number of template arguments were specified.
- const std::vector<std::string> &TArgs = MC->Rec.getTemplateArgs();
+ const std::vector<Init *> &TArgs = MC->Rec.getTemplateArgs();
if (TArgs.size() < TemplateVals.size())
return Error(SubClassLoc,
"more template args specified than multiclass expects");
@@ -2080,6 +2437,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
Record *DefProto = MC->DefPrototypes[i];
Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc);
+ if (!CurRec)
+ return true;
if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc,
TArgs, TemplateVals, true/*Delete args*/))
@@ -2165,6 +2524,7 @@ bool TGParser::ParseObject(MultiClass *MC) {
return TokError("Expected class, def, defm, multiclass or let definition");
case tgtok::Let: return ParseTopLevelLet(MC);
case tgtok::Def: return ParseDef(MC);
+ case tgtok::Foreach: return ParseForeach(MC);
case tgtok::Defm: return ParseDefm(MC);
case tgtok::Class: return ParseClass();
case tgtok::MultiClass: return ParseMultiClass();
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index db8a62029746..b8e7cb1929bb 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -14,6 +14,7 @@
#ifndef TGPARSER_H
#define TGPARSER_H
+#include "llvm/TableGen/Record.h"
#include "TGLexer.h"
#include "llvm/TableGen/Error.h"
#include "llvm/ADT/Twine.h"
@@ -41,17 +42,44 @@ namespace llvm {
}
};
+ /// ForeachLoop - Record the iteration state associated with a for loop.
+ /// This is used to instantiate items in the loop body.
+ struct ForeachLoop {
+ Init *IterVar;
+ Init *ListValue;
+
+ ForeachLoop(Init *IVar, Init *LValue) : IterVar(IVar), ListValue(LValue) {}
+ };
+
class TGParser {
TGLexer Lex;
std::vector<std::vector<LetRecord> > LetStack;
std::map<std::string, MultiClass*> MultiClasses;
+ /// Loops - Keep track of any foreach loops we are within.
+ ///
+ typedef std::vector<ForeachLoop> LoopVector;
+ LoopVector Loops;
+
/// CurMultiClass - If we are parsing a 'multiclass' definition, this is the
/// current value.
MultiClass *CurMultiClass;
// Record tracker
RecordKeeper &Records;
+
+ // A "named boolean" indicating how to parse identifiers. Usually
+ // identifiers map to some existing object but in special cases
+ // (e.g. parsing def names) no such object exists yet because we are
+ // in the middle of creating in. For those situations, allow the
+ // parser to ignore missing object errors.
+ enum IDParseMode {
+ ParseValueMode, // We are parsing a value we expect to look up.
+ ParseNameMode, // We are parsing a name of an object that does not yet
+ // exist.
+ ParseForeachMode // We are parsing a foreach init.
+ };
+
public:
TGParser(SourceMgr &SrcMgr, RecordKeeper &records) :
Lex(SrcMgr), CurMultiClass(0), Records(records) {}
@@ -70,14 +98,36 @@ public:
const std::vector<std::string> &getDependencies() const {
return Lex.getDependencies();
}
+
private: // Semantic analysis methods.
bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
- bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
+ bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
const std::vector<unsigned> &BitList, Init *V);
+ bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
+ const std::vector<unsigned> &BitList, Init *V) {
+ return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
+ }
bool AddSubClass(Record *Rec, SubClassReference &SubClass);
bool AddSubMultiClass(MultiClass *CurMC,
SubMultiClassReference &SubMultiClass);
+ // IterRecord: Map an iterator name to a value.
+ struct IterRecord {
+ Init *IterVar;
+ Init *IterValue;
+ IterRecord(Init *Var, Init *Val) : IterVar(Var), IterValue(Val) {}
+ };
+
+ // IterSet: The set of all iterator values at some point in the
+ // iteration space.
+ typedef std::vector<IterRecord> IterSet;
+
+ bool ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+ SMLoc Loc);
+ bool ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+ SMLoc Loc, IterSet &IterVals, ForeachLoop &CurLoop,
+ LoopVector::iterator NextLoop);
+
private: // Parser methods.
bool ParseObjectList(MultiClass *MC = 0);
bool ParseObject(MultiClass *MC);
@@ -85,13 +135,13 @@ private: // Parser methods.
bool ParseMultiClass();
Record *InstantiateMulticlassDef(MultiClass &MC,
Record *DefProto,
- const std::string &DefmPrefix,
+ Init *DefmPrefix,
SMLoc DefmPrefixLoc);
bool ResolveMulticlassDefArgs(MultiClass &MC,
Record *DefProto,
SMLoc DefmPrefixLoc,
SMLoc SubClassLoc,
- const std::vector<std::string> &TArgs,
+ const std::vector<Init *> &TArgs,
std::vector<Init *> &TemplateVals,
bool DeleteArgs);
bool ResolveMulticlassDef(MultiClass &MC,
@@ -100,6 +150,7 @@ private: // Parser methods.
SMLoc DefmPrefixLoc);
bool ParseDefm(MultiClass *CurMultiClass);
bool ParseDef(MultiClass *CurMultiClass);
+ bool ParseForeach(MultiClass *CurMultiClass);
bool ParseTopLevelLet(MultiClass *CurMultiClass);
std::vector<LetRecord> ParseLetList();
@@ -108,15 +159,19 @@ private: // Parser methods.
bool ParseBodyItem(Record *CurRec);
bool ParseTemplateArgList(Record *CurRec);
- std::string ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+ Init *ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+ Init *ParseForeachDeclaration(Init *&ForeachListValue);
SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm);
SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
- Init *ParseIDValue(Record *CurRec);
- Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc);
- Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0);
- Init *ParseValue(Record *CurRec, RecTy *ItemType = 0);
+ Init *ParseIDValue(Record *CurRec, IDParseMode Mode = ParseValueMode);
+ Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc,
+ IDParseMode Mode = ParseValueMode);
+ Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0,
+ IDParseMode Mode = ParseValueMode);
+ Init *ParseValue(Record *CurRec, RecTy *ItemType = 0,
+ IDParseMode Mode = ParseValueMode);
std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0);
std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
@@ -126,7 +181,7 @@ private: // Parser methods.
RecTy *ParseType();
Init *ParseOperation(Record *CurRec);
RecTy *ParseOperatorType();
- std::string ParseObjectName();
+ Init *ParseObjectName(MultiClass *CurMultiClass);
Record *ParseClassID();
MultiClass *ParseMultiClassID();
Record *ParseDefmID();
diff --git a/lib/TableGen/TableGenAction.cpp b/lib/TableGen/TableGenAction.cpp
new file mode 100644
index 000000000000..54e508309457
--- /dev/null
+++ b/lib/TableGen/TableGenAction.cpp
@@ -0,0 +1,15 @@
+//===- TableGenAction.cpp - defines TableGenAction --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/TableGenAction.h"
+
+using namespace llvm;
+
+void TableGenAction::anchor() { }
+
diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp
index 29588db324cf..09bcc7a5b53e 100644
--- a/lib/TableGen/TableGenBackend.cpp
+++ b/lib/TableGen/TableGenBackend.cpp
@@ -15,7 +15,9 @@
#include "llvm/TableGen/Record.h"
using namespace llvm;
-void TableGenBackend::EmitSourceFileHeader(const std::string &Desc,
+void TableGenBackend::anchor() { }
+
+void TableGenBackend::EmitSourceFileHeader(StringRef Desc,
raw_ostream &OS) const {
OS << "//===- TableGen'erated file -------------------------------------*-"
" C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate"
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 16d0da3b8ac7..2a1e8e4d3079 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -1,4 +1,4 @@
-//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,9 +18,7 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
namespace llvm {
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 5c727ad6e343..9b0cb0c9e575 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,9 +23,6 @@ include "llvm/Target/Target.td"
def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
"Thumb mode">;
-def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
- "Native client mode">;
-
//===----------------------------------------------------------------------===//
// ARM Subtarget features.
//
@@ -35,6 +32,9 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
@@ -86,6 +86,11 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+// Some processors perform return stack prediction. CodeGen should avoid issue
+// "normal" call instructions to callees which do not return.
+def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Has return address stack">;
+
/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
"Supports v7 DSP instructions in Thumb2">;
@@ -201,13 +206,14 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
// V7a Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2]>;
+ FeatureDSPThumb2, FeatureHasRAS]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2]>;
+ FeatureDSPThumb2, FeatureHasRAS]>;
def : Processor<"cortex-a9-mp", CortexA9Itineraries,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP]>;
+ FeatureDSPThumb2, FeatureMP,
+ FeatureHasRAS]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index ea3319fb0e03..410790a7baa0 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -13,10 +13,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARM.h"
#include "ARMBuildAttrs.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
@@ -35,7 +34,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectStreamer.h"
@@ -44,10 +42,7 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -85,15 +80,15 @@ namespace {
void EmitTextAttribute(unsigned Attribute, StringRef String) {
switch (Attribute) {
+ default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
case ARMBuildAttrs::CPU_name:
- Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+ Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
break;
/* GAS requires .fpu to be emitted regardless of EABI attribute */
case ARMBuildAttrs::Advanced_SIMD_arch:
case ARMBuildAttrs::VFP_arch:
- Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+ Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
break;
- default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
}
void Finish() { }
@@ -197,15 +192,14 @@ namespace {
AttributeItemType item = Contents[i];
Streamer.EmitULEB128IntValue(item.Tag, 0);
switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
case AttributeItemType::NumericAttribute:
Streamer.EmitULEB128IntValue(item.IntValue, 0);
break;
case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(UppercaseString(item.StringValue), 0);
+ Streamer.EmitBytes(item.StringValue.upper(), 0);
Streamer.EmitIntValue(0, 1); // '\0'
break;
- default:
- assert(0 && "Invalid attribute type");
}
}
@@ -300,6 +294,22 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitLabel(CurrentFnSym);
}
+void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ assert(Size && "C++ constructor pointer had zero size!");
+
+ const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
+ assert(GV && "C++ constructor pointer was not a GlobalValue!");
+
+ const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ (Subtarget->isTargetDarwin()
+ ? MCSymbolRefExpr::VK_None
+ : MCSymbolRefExpr::VK_ARM_TARGET1),
+ OutContext);
+
+ OutStreamer.EmitValue(E, Size);
+}
+
/// runOnMachineFunction - This uses the EmitInstruction()
/// method to print assembly for each instruction.
///
@@ -316,8 +326,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned TF = MO.getTargetFlags();
switch (MO.getType()) {
- default:
- assert(0 && "<unknown operand type>");
+ default: llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
@@ -494,11 +503,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
- case 'p': // The high single-precision register of a VFP double-precision
- // register.
case 'e': // The low doubleword register of a NEON quad register.
- case 'f': // The high doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // These modifiers are not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
case 'H': // The highest-numbered register of a pair.
return true;
@@ -576,10 +595,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
// Emit ARM Build Attributes
- if (Subtarget->isTargetELF()) {
-
+ if (Subtarget->isTargetELF())
emitAttributes();
- }
}
@@ -710,15 +727,26 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/vfpv3/vfpv2 for .fpu parameters */
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasVFP4())
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ "neon-vfpv4");
+ else
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
/* If emitted for NEON, omit from VFP below, since you can have both
* NEON and VFP in build attributes but only one .fpu */
emitFPU = false;
}
+ /* VFPv4 + .fpu */
+ if (Subtarget->hasVFP4()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
+
/* VFPv3 + .fpu */
- if (Subtarget->hasVFP3()) {
+ } else if (Subtarget->hasVFP3()) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
ARMBuildAttrs::AllowFPv3A);
if (emitFPU)
@@ -740,14 +768,14 @@ void ARMAsmPrinter::emitAttributes() {
}
// Signal various FP modes.
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
ARMBuildAttrs::Allowed);
}
- if (NoInfsFPMath && NoNaNsFPMath)
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
else
@@ -760,7 +788,7 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
}
@@ -808,7 +836,6 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
static MCSymbolRefExpr::VariantKind
getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
switch (Modifier) {
- default: llvm_unreachable("Unknown modifier!");
case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
@@ -816,7 +843,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
}
- return MCSymbolRefExpr::VK_None;
+ llvm_unreachable("Invalid ARMCPModifier!");
}
MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
@@ -1070,7 +1097,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
// Try to figure out the unwinding opcode out of src / dst regs.
- if (MI->getDesc().mayStore()) {
+ if (MI->mayStore()) {
// Register saves.
assert(DstReg == ARM::SP &&
"Only stack pointer as a destination reg is supported");
@@ -1084,7 +1111,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
StartOp = 2; NumOffset = 2;
@@ -1099,6 +1126,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
+ case ARM::t2STR_PRE:
assert(MI->getOperand(2).getReg() == ARM::SP &&
"Only stack pointer as a source reg is supported");
RegList.push_back(SrcReg);
@@ -1112,14 +1140,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::MOVr:
+ case ARM::tMOVr:
Offset = 0;
break;
case ARM::ADDri:
Offset = -MI->getOperand(2).getImm();
break;
case ARM::SUBri:
+ case ARM::t2SUBri:
Offset = MI->getOperand(2).getImm();
break;
case ARM::tSUBspi:
@@ -1157,16 +1187,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
OutStreamer.EmitPad(Offset);
} else {
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
}
} else if (DstReg == ARM::SP) {
// FIXME: .movsp goes here
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
}
else {
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
}
}
}
@@ -1195,7 +1225,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Check for manual lowerings.
unsigned Opc = MI->getOpcode();
switch (Opc) {
- case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+ case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass");
case ARM::DBG_VALUE: {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
SmallString<128> TmpStr;
@@ -1237,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Darwin call instructions are just normal call instructions with different
// clobber semantics (they clobber R9).
- case ARM::BXr9_CALL:
case ARM::BX_CALL: {
{
MCInst TmpInst;
@@ -1259,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
- case ARM::tBXr9_CALL:
case ARM::tBX_CALL: {
{
MCInst TmpInst;
@@ -1282,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
- case ARM::BMOVPCRXr9_CALL:
case ARM::BMOVPCRX_CALL: {
{
MCInst TmpInst;
@@ -1310,6 +1337,58 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
+ case ARM::BMOVPCB_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::Bcc);
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::t2BMOVPCB_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2B);
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
case ARM::MOVi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel: {
MCInst TmpInst;
@@ -1482,11 +1561,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
/// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- EmitAlignment(2);
-
// Mark the constant pool entry as data if we're not already in a data
// region.
OutStreamer.EmitDataRegion();
@@ -1898,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
{
MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRr);
+ TmpInst.setOpcode(ARM::tLDRi);
TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
TmpInst.addOperand(MCOperand::CreateReg(0));
@@ -1935,4 +2013,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
}
-
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7741fc4b34e8..af3f75a0e892 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -73,6 +73,7 @@ public:
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
+ void EmitXXStructor(const Constant *CV);
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -106,7 +107,7 @@ public:
if (!Subtarget->isTargetDarwin())
return 0;
return Subtarget->isThumb() ?
- llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 408edfc20d4c..c6280f819a4f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,10 +13,10 @@
#include "ARMBaseInstrInfo.h"
#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
@@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden,
+WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
@@ -147,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- bool isLoad = !MCID.mayStore();
+ bool isLoad = !MI->mayStore();
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -157,9 +156,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned OffImm = MI->getOperand(NumOps-2).getImm();
ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
switch (AddrMode) {
- default:
- assert(false && "Unknown indexed op!");
- return NULL;
+ default: llvm_unreachable("Unknown indexed op!");
case ARMII::AddrMode2: {
bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
@@ -440,6 +437,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
bool ARMBaseInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -490,15 +503,11 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
- // FIXME: This confuses implicit_def with optional CPSR def.
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
- return false;
-
bool Found = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
+ (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
Pred.push_back(MO);
Found = true;
}
@@ -511,11 +520,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
- if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
return AFI->isThumb2Function();
@@ -544,83 +552,95 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
if (MCID.getSize())
return MCID.getSize();
- // If this machine instr is an inline asm, measure it.
- if (MI->getOpcode() == ARM::INLINEASM)
- return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
- if (MI->isLabel())
- return 0;
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI->isLabel())
+ return 0;
unsigned Opc = MI->getOpcode();
- switch (Opc) {
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::PROLOG_LABEL:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::DBG_VALUE:
- return 0;
- case ARM::MOVi16_ga_pcrel:
- case ARM::MOVTi16_ga_pcrel:
- case ARM::t2MOVi16_ga_pcrel:
- case ARM::t2MOVTi16_ga_pcrel:
- return 4;
- case ARM::MOVi32imm:
- case ARM::t2MOVi32imm:
- return 8;
- case ARM::CONSTPOOL_ENTRY:
- // If this machine instr is a constant pool entry, its size is recorded as
- // operand #2.
- return MI->getOperand(2).getImm();
- case ARM::Int_eh_sjlj_longjmp:
- return 16;
- case ARM::tInt_eh_sjlj_longjmp:
- return 10;
- case ARM::Int_eh_sjlj_setjmp:
- case ARM::Int_eh_sjlj_setjmp_nofp:
- return 20;
- case ARM::tInt_eh_sjlj_setjmp:
- case ARM::t2Int_eh_sjlj_setjmp:
- case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 12;
- case ARM::BR_JTr:
- case ARM::BR_JTm:
- case ARM::BR_JTadd:
- case ARM::tBR_JTr:
- case ARM::t2BR_JT:
- case ARM::t2TBB_JT:
- case ARM::t2TBH_JT: {
- // These are jumptable branches, i.e. a branch followed by an inlined
- // jumptable. The size is 4 + 4 * number of entries. For TBB, each
- // entry is one byte; TBH two byte each.
- unsigned EntrySize = (Opc == ARM::t2TBB_JT)
- ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
- unsigned NumOps = MCID.getNumOperands();
- MachineOperand JTOP =
- MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
- unsigned JTI = JTOP.getIndex();
- const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- assert(MJTI != 0);
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- assert(JTI < JT.size());
- // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
- // 4 aligned. The assembler / linker may add 2 byte padding just before
- // the JT entries. The size does not include this padding; the
- // constant islands pass does separate bookkeeping for it.
- // FIXME: If we know the size of the function is less than (1 << 16) *2
- // bytes, we can use 16-bit entries instead. Then there won't be an
- // alignment issue.
- unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
- unsigned NumEntries = getNumJTEntries(JT, JTI);
- if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
- // Make sure the instruction that follows TBB is 2-byte aligned.
- // FIXME: Constant island pass should insert an "ALIGN" instruction
- // instead.
- ++NumEntries;
- return NumEntries * EntrySize + InstSize;
- }
- default:
- // Otherwise, pseudo-instruction sizes are zero.
- return 0;
- }
- return 0; // Not reached
+ switch (Opc) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr:
+ case ARM::t2BR_JT:
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+ // entry is one byte; TBH two byte each.
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+ unsigned NumOps = MCID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ assert(MJTI != 0);
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+ unsigned NumEntries = getNumJTEntries(JT, JTI);
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+ // Make sure the instruction that follows TBB is 2-byte aligned.
+ // FIXME: Constant island pass should insert an "ALIGN" instruction
+ // instead.
+ ++NumEntries;
+ return NumEntries * EntrySize + InstSize;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+}
+
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -660,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
- if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
- ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ // Handle register classes that require multiple instructions.
+ unsigned BeginIdx = 0;
+ unsigned SubRegs = 0;
+ unsigned Spacing = 1;
+
+ // Use VORRq when possible.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ // Fall back to VMOVD.
+ else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
+ else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
+ else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
+
+ else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
+ else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
+ else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+
+ if (Opc) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
- assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
- unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
- ARM::qsub_1 : ARM::qsub_3;
- for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, i);
- unsigned Src = TRI->getSubReg(SrcReg, i);
- MachineInstrBuilder Mov =
- AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq))
- .addReg(Dst, RegState::Define)
- .addReg(Src, getKillRegState(KillSrc))
- .addReg(Src, getKillRegState(KillSrc)));
- if (i == EndSubReg) {
- Mov->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- Mov->addRegisterKilled(SrcReg, TRI);
- }
+ MachineInstrBuilder Mov;
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ assert(Dst && Src && "Bad sub-register");
+ Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src));
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
}
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
return;
}
+
llvm_unreachable("Impossible reg-to-reg copy");
}
@@ -710,8 +752,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(
- PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
Align);
@@ -738,9 +779,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::QPRRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
@@ -825,7 +867,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::VST1q64Pseudo:
+ case ARM::VST1q64:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -847,7 +889,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
@@ -862,7 +904,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
@@ -887,9 +929,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::QPRRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
@@ -911,11 +953,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -926,15 +969,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
- MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -971,7 +1015,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q64:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -993,7 +1037,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1359,7 +1403,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1380,7 +1424,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
- if (MI->definesRegister(ARM::SP))
+ // Calls don't actually change the stack pointer, even if they have imp-defs.
+ // No ARM calling conventions change the stack pointer. (X86 calling
+ // conventions sometimes do).
+ if (!MI->isCall() && MI->definesRegister(ARM::SP))
return true;
return false;
@@ -1445,15 +1492,37 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
int llvm::getMatchingCondBranchOpcode(int Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
- else if (Opc == ARM::tB)
+ if (Opc == ARM::tB)
return ARM::tBcc;
- else if (Opc == ARM::t2B)
- return ARM::t2Bcc;
+ if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
llvm_unreachable("Unknown unconditional branch opcode!");
- return 0;
}
+/// commuteInstruction - Handle commutable instructions.
+MachineInstr *
+ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case ARM::MOVCCr:
+ case ARM::t2MOVCCr: {
+ // MOVCC can be commuted by inverting the condition.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
+ // MOVCC AL can't be inverted. Shouldn't happen.
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return NULL;
+ MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ if (!MI)
+ return NULL;
+ // After swapping the MOVCC operands, also invert the condition.
+ MI->getOperand(MI->findFirstPredOperandIdx())
+ .setImm(ARMCC::getOppositeCondition(CC));
+ return MI;
+ }
+ }
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+}
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
@@ -1478,7 +1547,6 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::SUBSrsr, ARM::SUBrsr},
{ARM::RSBSri, ARM::RSBri},
- {ARM::RSBSrr, ARM::RSBrr},
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
@@ -1625,7 +1693,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
}
default:
llvm_unreachable("Unsupported addressing mode!");
- break;
}
Offset += InstrOffs * Scale;
@@ -1765,8 +1832,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change.
- MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
- B = MI->getParent()->begin();
+ MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
@@ -1777,6 +1843,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
+ return false;
if (!MO.isReg()) continue;
// This instruction modifies or uses CPSR after the one we want to
@@ -1838,6 +1906,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
for (unsigned IO = 0, EO = Instr.getNumOperands();
!isSafe && IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
if (!MO.isReg() || MO.getReg() != ARM::CPSR)
continue;
if (MO.isDef()) {
@@ -1889,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
if (!MRI->hasOneNonDBGUse(Reg))
return false;
+ const MCInstrDesc &DefMCID = DefMI->getDesc();
+ if (DefMCID.hasOptionalDef()) {
+ unsigned NumOps = DefMCID.getNumOperands();
+ const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+ if (MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If DefMI defines CPSR and it is not dead, it's obviously not safe
+ // to delete DefMI.
+ return false;
+ }
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ if (UseMCID.hasOptionalDef()) {
+ unsigned NumOps = UseMCID.getNumOperands();
+ if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+ // If the instruction sets the flag, do not attempt this optimization
+ // since it may change the semantics of the code.
+ return false;
+ }
+
unsigned UseOpc = UseMI->getOpcode();
unsigned NewUseOpc = 0;
uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
@@ -1960,7 +2051,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
- *UseMI, UseMI->getDebugLoc(),
+ UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
@@ -1988,7 +2079,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
switch (Opc) {
default:
llvm_unreachable("Unexpected multi-uops instruction!");
- break;
case ARM::VLDMQIA:
case ARM::VSTMQIA:
return 2;
@@ -2335,6 +2425,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return UseCycle;
}
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II =
+ llvm::prior(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
+ UseIdx = Idx;
+ return II;
+}
+
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
@@ -2343,35 +2486,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
DefMI->isRegSequence() || DefMI->isImplicitDef())
return 1;
- const MCInstrDesc &DefMCID = DefMI->getDesc();
if (!ItinData || ItinData->isEmpty())
- return DefMCID.mayLoad() ? 3 : 1;
+ return DefMI->mayLoad() ? 3 : 1;
- const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- if (DefMO.getReg() == ARM::CPSR) {
+ unsigned Reg = DefMO.getReg();
+ if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isCortexA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
- if (UseMCID.isBranch())
+ if (UseMI->isBranch())
return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ int Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
}
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
? (*UseMI->memoperands_begin())->getAlignment() : 0;
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
- UseMCID, UseIdx, UseAlign);
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+ DefMCID = &DefMI->getDesc();
+ }
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (NewUseMI) {
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+ }
+
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ int Adj = DefAdj + UseAdj;
+ if (Adj) {
+ Latency -= (int)(DefAdj + UseAdj);
+ if (Latency < 1)
+ return 1;
+ }
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
@@ -2396,28 +2581,38 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (DefAlign < 8 && Subtarget.isCortexA9())
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2425,7 +2620,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
- case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
case ARM::VLD3q8_UPD:
case ARM::VLD3q16_UPD:
case ARM::VLD3q32_UPD:
@@ -2436,22 +2632,29 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4d8_UPD:
case ARM::VLD4d16_UPD:
case ARM::VLD4d32_UPD:
- case ARM::VLD1d64Q_UPD:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
case ARM::VLD4q8_UPD:
case ARM::VLD4q16_UPD:
case ARM::VLD4q32_UPD:
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
- case ARM::VLD1DUPq8_UPD:
- case ARM::VLD1DUPq16_UPD:
- case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
- case ARM::VLD2DUPd8_UPD:
- case ARM::VLD2DUPd16_UPD:
- case ARM::VLD2DUPd32_UPD:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8:
case ARM::VLD4DUPd16:
case ARM::VLD4DUPd32:
@@ -2559,26 +2762,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (DefAlign < 8 && Subtarget.isCortexA9())
switch (DefMCID.getOpcode()) {
default: break;
- case ARM::VLD1q8Pseudo:
- case ARM::VLD1q16Pseudo:
- case ARM::VLD1q32Pseudo:
- case ARM::VLD1q64Pseudo:
- case ARM::VLD1q8Pseudo_UPD:
- case ARM::VLD1q16Pseudo_UPD:
- case ARM::VLD1q32Pseudo_UPD:
- case ARM::VLD1q64Pseudo_UPD:
- case ARM::VLD2d8Pseudo:
- case ARM::VLD2d16Pseudo:
- case ARM::VLD2d32Pseudo:
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -2586,7 +2799,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD3d8Pseudo_UPD:
case ARM::VLD3d16Pseudo_UPD:
case ARM::VLD3d32Pseudo_UPD:
- case ARM::VLD1d64TPseudo_UPD:
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
@@ -2603,7 +2815,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
case ARM::VLD4d32Pseudo_UPD:
- case ARM::VLD1d64QPseudo_UPD:
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
@@ -2613,18 +2824,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8Pseudo_UPD:
- case ARM::VLD2DUPd16Pseudo_UPD:
- case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
@@ -2666,6 +2883,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
+ unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+ if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+ return 1;
+
+ // If the second MI is predicated, then there is an implicit use dependency.
+ return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+ DepMI->getNumOperands());
+}
+
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -2676,10 +2906,21 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!ItinData || ItinData->isEmpty())
return 1;
+ if (MI->isBundle()) {
+ int Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, I, PredCost);
+ }
+ return Latency;
+ }
+
const MCInstrDesc &MCID = MI->getDesc();
unsigned Class = MCID.getSchedClass();
unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)))
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
@@ -2828,3 +3069,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// This will go before any implicit ops.
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
+
+bool ARMBaseInstrInfo::hasNOP() const {
+ return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f32179a31..2fe85072a330 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -35,6 +35,9 @@ protected:
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
public:
+ // Return whether the target has an explicit NOP encoding.
+ bool hasNOP() const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
@@ -69,10 +72,7 @@ public:
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
// Predication support.
- bool isPredicated(const MachineInstr *MI) const {
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
- }
+ bool isPredicated(const MachineInstr *MI) const;
ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
int PIdx = MI->findFirstPredOperandIdx();
@@ -139,6 +139,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+ MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
@@ -213,12 +215,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+ virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const;
+
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 154f1f8ff997..3907f7535260 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
-#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -61,41 +60,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
BasePtr(ARM::R6) {
}
-const unsigned*
+const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool ghcCall = false;
-
- if (MF) {
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- }
-
- static const unsigned CalleeSavedRegs[] = {
- ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
- ARM::R7, ARM::R6, ARM::R5, ARM::R4,
-
- ARM::D15, ARM::D14, ARM::D13, ARM::D12,
- ARM::D11, ARM::D10, ARM::D9, ARM::D8,
- 0
- };
-
- static const unsigned DarwinCalleeSavedRegs[] = {
- // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
- // register.
- ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
- ARM::R11, ARM::R10, ARM::R8,
-
- ARM::D15, ARM::D14, ARM::D13, ARM::D12,
- ARM::D11, ARM::D10, ARM::D9, ARM::D8,
- 0
- };
-
- static const unsigned GhcCalleeSavedRegs[] = {
- 0
- };
+ return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+}
- return ghcCall ? GhcCalleeSavedRegs :
- STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+const uint32_t*
+ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
BitVector ARMBaseRegisterInfo::
@@ -148,104 +120,6 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
return false;
}
-const TargetRegisterClass *
-ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- unsigned SubIdx) const {
- switch (SubIdx) {
- default: return 0;
- case ARM::ssub_0:
- case ARM::ssub_1:
- case ARM::ssub_2:
- case ARM::ssub_3: {
- // S sub-registers.
- if (A->getSize() == 8) {
- if (B == &ARM::SPR_8RegClass)
- return &ARM::DPR_8RegClass;
- assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
- if (A == &ARM::DPR_8RegClass)
- return A;
- return &ARM::DPR_VFP2RegClass;
- }
-
- if (A->getSize() == 16) {
- if (B == &ARM::SPR_8RegClass)
- return &ARM::QPR_8RegClass;
- return &ARM::QPR_VFP2RegClass;
- }
-
- if (A->getSize() == 32) {
- if (B == &ARM::SPR_8RegClass)
- return 0; // Do not allow coalescing!
- return &ARM::QQPR_VFP2RegClass;
- }
-
- assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
- return 0; // Do not allow coalescing!
- }
- case ARM::dsub_0:
- case ARM::dsub_1:
- case ARM::dsub_2:
- case ARM::dsub_3: {
- // D sub-registers.
- if (A->getSize() == 16) {
- if (B == &ARM::DPR_VFP2RegClass)
- return &ARM::QPR_VFP2RegClass;
- if (B == &ARM::DPR_8RegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
-
- if (A->getSize() == 32) {
- if (B == &ARM::DPR_VFP2RegClass)
- return &ARM::QQPR_VFP2RegClass;
- if (B == &ARM::DPR_8RegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
-
- assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
- if (B != &ARM::DPRRegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
- case ARM::dsub_4:
- case ARM::dsub_5:
- case ARM::dsub_6:
- case ARM::dsub_7: {
- // D sub-registers of QQQQ registers.
- if (A->getSize() == 64 && B == &ARM::DPRRegClass)
- return A;
- return 0; // Do not allow coalescing!
- }
-
- case ARM::qsub_0:
- case ARM::qsub_1: {
- // Q sub-registers.
- if (A->getSize() == 32) {
- if (B == &ARM::QPR_VFP2RegClass)
- return &ARM::QQPR_VFP2RegClass;
- if (B == &ARM::QPR_8RegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
-
- assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
- if (B == &ARM::QPRRegClass)
- return A;
- return 0; // Do not allow coalescing!
- }
- case ARM::qsub_2:
- case ARM::qsub_3: {
- // Q sub-registers of QQQQ registers.
- if (A->getSize() == 64 && B == &ARM::QPRRegClass)
- return A;
- return 0; // Do not allow coalescing!
- }
- }
- return 0;
-}
-
bool
ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
@@ -416,7 +290,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
/// getRawAllocationOrder - Returns the register allocation order for a
/// specified register class with a target-dependent hint.
-ArrayRef<unsigned>
+ArrayRef<uint16_t>
ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
unsigned HintType, unsigned HintReg,
const MachineFunction &MF) const {
@@ -425,71 +299,71 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
// of register pairs.
// No FP, R9 is available.
- static const unsigned GPREven1[] = {
+ static const uint16_t GPREven1[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
ARM::R9, ARM::R11
};
- static const unsigned GPROdd1[] = {
+ static const uint16_t GPROdd1[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
ARM::R8, ARM::R10
};
// FP is R7, R9 is available.
- static const unsigned GPREven2[] = {
+ static const uint16_t GPREven2[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
ARM::R9, ARM::R11
};
- static const unsigned GPROdd2[] = {
+ static const uint16_t GPROdd2[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
ARM::R8, ARM::R10
};
// FP is R11, R9 is available.
- static const unsigned GPREven3[] = {
+ static const uint16_t GPREven3[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
ARM::R9
};
- static const unsigned GPROdd3[] = {
+ static const uint16_t GPROdd3[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
ARM::R8
};
// No FP, R9 is not available.
- static const unsigned GPREven4[] = {
+ static const uint16_t GPREven4[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
ARM::R11
};
- static const unsigned GPROdd4[] = {
+ static const uint16_t GPROdd4[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
ARM::R10
};
// FP is R7, R9 is not available.
- static const unsigned GPREven5[] = {
+ static const uint16_t GPREven5[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
ARM::R11
};
- static const unsigned GPROdd5[] = {
+ static const uint16_t GPROdd5[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
ARM::R10
};
// FP is R11, R9 is not available.
- static const unsigned GPREven6[] = {
+ static const uint16_t GPREven6[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6,
ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
};
- static const unsigned GPROdd6[] = {
+ static const uint16_t GPROdd6[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R7,
ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
};
@@ -610,11 +484,15 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
if (!EnableBasePointer)
return false;
- if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ // When outgoing call frames are so large that we adjust the stack pointer
+ // around the call, we can no longer use the stack pointer to reach the
+ // emergency spill slot.
+ if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
return true;
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
@@ -638,14 +516,29 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// We can't realign the stack if:
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- return (RealignStack && !AFI->isThumb1OnlyFunction() &&
- (!MFI->hasVarSizedObjects() || EnableBasePointer));
+ if (!MF.getTarget().Options.RealignStack)
+ return false;
+ if (AFI->isThumb1OnlyFunction())
+ return false;
+ // Stack realignment requires a frame pointer. If we already started
+ // register allocation with frame pointer elimination, it is too late now.
+ if (!MRI->canReserveReg(FramePtr))
+ return false;
+ // We may also need a base pointer if there are dynamic allocas or stack
+ // pointer adjustments around calls.
+ if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+ return true;
+ if (!EnableBasePointer)
+ return false;
+ // A base pointer is required and allowed. Check that it isn't too late to
+ // reserve it.
+ return MRI->canReserveReg(BasePtr);
}
bool ARMBaseRegisterInfo::
@@ -653,7 +546,7 @@ needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
- bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
@@ -662,7 +555,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
@@ -679,12 +572,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
- return 0;
}
unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
- return 0;
}
unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
@@ -892,7 +783,7 @@ int64_t ARMBaseRegisterInfo::
getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- int64_t InstrOffs = 0;;
+ int64_t InstrOffs = 0;
int Scale = 1;
unsigned ImmIdx = 0;
switch (AddrMode) {
@@ -933,7 +824,6 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
}
default:
llvm_unreachable("Unsupported addressing mode!");
- break;
}
return InstrOffs * Scale;
@@ -1129,7 +1019,6 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
break;
default:
llvm_unreachable("Unsupported addressing mode!");
- break;
}
Offset += getFrameIndexInstrOffset(MI, i);
@@ -1171,6 +1060,21 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ assert(TFI->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index fee17ff3c1ca..af7935147e48 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -35,7 +35,7 @@ namespace ARMRI {
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
using namespace ARM;
switch (Reg) {
case R0: case R1: case R2: case R3:
@@ -43,25 +43,25 @@ static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
case LR: case SP: case PC:
return true;
case R8: case R9: case R10: case R11:
- // For darwin we want r7 and lr to be next to each other.
- return !isDarwin;
+ // For iOS we want r7 and lr to be next to each other.
+ return !isIOS;
default:
return false;
}
}
-static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
using namespace ARM;
switch (Reg) {
case R8: case R9: case R10: case R11:
- // Darwin has this second area.
- return isDarwin;
+ // iOS has this second area.
+ return isIOS;
default:
return false;
}
}
-static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
using namespace ARM;
switch (Reg) {
case D15: case D14: case D13: case D12:
@@ -94,17 +94,11 @@ protected:
public:
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- /// getMatchingSuperRegClass - Return a subclass of the specified register
- /// class A so that each register in it has a sub-register of the
- /// specified sub-register index which is in the specified register class B.
- virtual const TargetRegisterClass *
- getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B, unsigned Idx) const;
-
/// canCombineSubRegIndices - Given a register class and a list of
/// subregister indices, return true if it's possible to combine the
/// subregister indices into one that corresponds to a larger
@@ -125,7 +119,7 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
- ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+ ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
unsigned HintType, unsigned HintReg,
const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
index 69eddf03ec94..11bd6a4a8dbc 100644
--- a/lib/Target/ARM/ARMBuildAttrs.h
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -1,4 +1,4 @@
-//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index ff7db1ff62ed..0bd1c3ee2feb 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -1,4 +1,4 @@
-//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,13 +15,12 @@
#ifndef ARMCALLINGCONV_H
#define ARMCALLINGCONV_H
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
-#include "ARM.h"
namespace llvm {
@@ -29,7 +28,7 @@ namespace llvm {
static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -72,9 +71,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
- static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
@@ -118,8 +117,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
if (Reg == 0)
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 47b2e9829834..d33364bb2871 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,7 +25,7 @@ def CC_ARM_APCS : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -43,6 +43,7 @@ def CC_ARM_APCS : CallingConv<[
]>;
def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[f32], CCBitConvertToType<i32>>,
// Handle all vector types as either f64 or v2f64.
@@ -82,25 +83,6 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
-//===----------------------------------------------------------------------===//
-// ARM APCS Calling Convention for GHC
-//===----------------------------------------------------------------------===//
-
-def CC_ARM_APCS_GHC : CallingConv<[
- // Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
-
- CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
- CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
- CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
-
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
- // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
- CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
-]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -108,7 +90,7 @@ def CC_ARM_APCS_GHC : CallingConv<[
def CC_ARM_AAPCS_Common : CallingConv<[
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
// i64/f64 is passed in even pairs of GPRs
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
@@ -125,6 +107,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
]>;
def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
]>;
@@ -181,3 +164,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
S9, S10, S11, S12, S13, S14, S15]>>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
+ (sequence "D%u", 15, 8))>;
+
+// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
+// Also save R7-R4 first to match the stack frame fixed spill areas.
+def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 4148d4ab10e9..32ef345c058f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,7 +15,7 @@
#define DEBUG_TYPE "jit"
#include "ARM.h"
#include "ARMConstantPoolValue.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
@@ -46,7 +46,7 @@ namespace {
class ARMCodeEmitter : public MachineFunctionPass {
ARMJITInfo *JTI;
- const ARMInstrInfo *II;
+ const ARMBaseInstrInfo *II;
const TargetData *TD;
const ARMSubtarget *Subtarget;
TargetMachine &TM;
@@ -66,7 +66,7 @@ namespace {
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
: MachineFunctionPass(ID), JTI(0),
- II((const ARMInstrInfo *)tm.getInstrInfo()),
+ II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
@@ -74,7 +74,7 @@ namespace {
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
- unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
bool runOnMachineFunction(MachineFunction &MF);
@@ -189,6 +189,8 @@ namespace {
unsigned Op) const { return 0; }
unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
@@ -366,9 +368,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
MF.getTarget().getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
- JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
- II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
- TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+ JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
+ II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
+ TD = MF.getTarget().getTargetData();
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
@@ -386,7 +388,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
@@ -406,7 +408,6 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
case ARM_AM::ror:
case ARM_AM::rrx: return 3;
}
- return 0;
}
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
@@ -532,7 +533,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
switch (MI.getDesc().TSFlags & ARMII::FormMask) {
default: {
llvm_unreachable("Unhandled instruction encoding format!");
- break;
}
case ARMII::MiscFrm:
if (MI.getOpcode() == ARM::LEApcrelJT) {
@@ -541,7 +541,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
break;
}
llvm_unreachable("Unhandled instruction encoding!");
- break;
case ARMII::Pseudo:
emitPseudoInstruction(MI);
break;
@@ -837,9 +836,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
case ARM::BX_CALL:
- case ARM::BMOVPCRX_CALL:
- case ARM::BXr9_CALL:
- case ARM::BMOVPCRXr9_CALL: {
+ case ARM::BMOVPCRX_CALL: {
// First emit mov lr, pc
unsigned Binary = 0x01a0e00f;
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a4134c704..fc35c7cb0200 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,16 +16,17 @@
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,44 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints. It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block. If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+ unsigned KnownBits) {
+ // Add the worst possible padding that the unknown bits could cause.
+ Offset += UnknownPadding(LogAlign, KnownBits);
+
+ // Then align the result.
+ return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -64,16 +103,70 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
- /// by MBB Number. The two-byte pads required for Thumb alignment are
- /// counted as part of the following block (i.e., the offset and size for
- /// a padded block will both be ==2 mod 4).
- std::vector<unsigned> BBSizes;
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ return Unalign ? Unalign : KnownBits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return WorstCaseAlign(PO, LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
- /// BBOffsets - the offset of each MBB in bytes, starting from 0.
- /// The two-byte pads required for Thumb alignment are counted as part of
- /// the following block.
- std::vector<unsigned> BBOffsets;
+ std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
@@ -102,14 +195,24 @@ namespace {
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
+ private:
unsigned MaxDisp;
+ public:
bool NegOk;
bool IsSoImm;
+ bool KnownAlignment;
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
- : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
+ KnownAlignment(false) {
HighWaterMark = CPEMI->getParent();
}
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ /// Correct for unknown alignment.
+ /// Conservatively subtract 2 bytes to handle weird alignment effects.
+ unsigned getMaxDisp() const {
+ return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
+ }
};
/// CPUsers - Keep track of all of the machine instructions that use various
@@ -162,10 +265,9 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- /// HasInlineAsm - True if the function contains inline assembly.
- bool HasInlineAsm;
-
- const ARMInstrInfo *TII;
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
+ const ARMBaseInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
@@ -182,85 +284,100 @@ namespace {
}
private:
- void DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs);
+ void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- void JumpTableFunctionScan(MachineFunction &MF);
- void InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs);
- MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
- void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
- void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
- bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
- int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
- bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
- void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void scanFunctionJumpTables();
+ void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ bool findAvailableWater(CPUser&U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
- void RemoveDeadCPEMI(MachineInstr *CPEMI);
- bool RemoveUnusedCPEntries();
- bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
- MachineInstr *CPEMI, unsigned Disp, bool NegOk,
- bool DoDump = false);
- bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
- CPUser &U);
- bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
- unsigned Disp, bool NegativeOK, bool IsSoImm = false);
- bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
- bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool UndoLRSpillRestore();
- bool OptimizeThumb2Instructions(MachineFunction &MF);
- bool OptimizeThumb2Branches(MachineFunction &MF);
- bool ReorderThumb2JumpTables(MachineFunction &MF);
- bool OptimizeThumb2JumpTables(MachineFunction &MF);
- MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U, unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+ bool undoLRSpillRestore();
+ bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
+ bool optimizeThumb2Instructions();
+ bool optimizeThumb2Branches();
+ bool reorderThumb2JumpTables();
+ bool optimizeThumb2JumpTables();
+ MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
- unsigned GetOffsetOf(MachineInstr *MI) const;
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser&) const;
void dumpBBs();
- void verify(MachineFunction &MF);
+ void verify();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getMaxDisp(), U.NegOk, U.IsSoImm);
+ }
};
char ARMConstantIslands::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
- assert(BBOffsets.size() == BBSizes.size());
- for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
- assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
- if (!isThumb)
- return;
+void ARMConstantIslands::verify() {
#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() &&
- MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned MBBId = MBB->getNumber();
- assert(HasInlineAsm ||
- (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
- (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
- }
+ unsigned Align = MBB->getAlignment();
+ unsigned MBBId = MBB->getNumber();
+ assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
+ DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
- unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
- unsigned CPEOffset = GetOffsetOf(U.CPEMI);
- unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
- UserOffset - CPEOffset;
- assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ unsigned UserOffset = getUserOffset(U);
+ // Verify offset using the real max displacement without the safety
+ // adjustment.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+ /* DoDump = */ true)) {
+ DEBUG(dbgs() << "OK\n");
+ continue;
+ }
+ DEBUG(dbgs() << "Out of range.\n");
+ dumpBBs();
+ DEBUG(MF->dump());
+ llvm_unreachable("Constant pool entry out of range!");
}
#endif
}
/// print block size and offset information - debugging
void ARMConstantIslands::dumpBBs() {
- for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
- DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
- << " size " << BBSizes[J] << "\n");
- }
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
}
/// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +386,41 @@ FunctionPass *llvm::createARMConstantIslandPass() {
return new ARMConstantIslands();
}
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
- MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
- TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
- AFI = MF.getInfo<ARMFunctionInfo>();
- STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+ STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
- HasInlineAsm = false;
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
if (isThumb2 && AdjustJumpTableBlocks) {
- JumpTableFunctionScan(MF);
- MadeChange |= ReorderThumb2JumpTables(MF);
+ scanFunctionJumpTables();
+ MadeChange |= reorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
T2JumpTables.clear();
// Blocks may have shifted around. Keep the numbering up to date.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
}
// Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +428,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
+ MF->EnsureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
- if (!MCP.isEmpty()) {
- DoInitialPlacement(MF, CPEMIs);
- if (isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
- }
+ if (!MCP->isEmpty())
+ doInitialPlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
@@ -321,34 +442,36 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
// constant pool users.
- InitialFunctionScan(MF, CPEMIs);
+ initializeFunctionInfo(CPEMIs);
CPEMIs.clear();
DEBUG(dumpBBs());
/// Remove dead constant pool entries.
- MadeChange |= RemoveUnusedCPEntries();
+ MadeChange |= removeUnusedCPEntries();
// Iteratively place constant pool entries and fix up branches until there
// is no change.
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= HandleConstantPoolUser(MF, i);
+ CPChange |= handleConstantPoolUser(i);
if (CPChange && ++NoCPIters > 30)
- llvm_unreachable("Constant Island pass failed to converge!");
+ report_fatal_error("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
// Clear NewWaterList now. If we split a block for branches, it should
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
bool BRChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
- BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
if (BRChange && ++NoBRIters > 30)
- llvm_unreachable("Branch Fix Up pass failed to converge!");
+ report_fatal_error("Branch Fix Up pass failed to converge!");
DEBUG(dumpBBs());
if (!CPChange && !BRChange)
@@ -358,15 +481,15 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Shrink 32-bit Thumb2 branch, load, and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
- MadeChange |= OptimizeThumb2Instructions(MF);
+ MadeChange |= optimizeThumb2Instructions();
// After a while, this might be made debug-only, but it is not expensive.
- verify(MF);
+ verify();
// If LR has been forced spilled and no far jump (i.e. BL) has been issued,
// undo the spill / restore of LR if possible.
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
- MadeChange |= UndoLRSpillRestore();
+ MadeChange |= undoLRSpillRestore();
// Save the mapping between original and cloned constpool entries.
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
@@ -376,10 +499,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
}
- DEBUG(errs() << '\n'; dumpBBs());
+ DEBUG(dbgs() << '\n'; dumpBBs());
- BBSizes.clear();
- BBOffsets.clear();
+ BBInfo.clear();
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
@@ -390,39 +512,68 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
return MadeChange;
}
-/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// doInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
- MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
- MF.push_back(BB);
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->EnsureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
- const std::vector<MachineConstantPoolEntry> &CPs =
- MF.getConstantPool()->getConstants();
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF.getTarget().getTargetData();
+ const TargetData &TD = *MF->getTarget().getTargetData();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
- // Verify that all constant pool entries are a multiple of 4 bytes. If not,
- // we would have to pad them out or something so that instructions stay
- // aligned.
- assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
- BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
// Add a new CPEntry, but no corresponding CPUser yet.
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
++NumCPEs;
- DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
- << "\n");
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+ << Size << ", align = " << Align <<'\n');
}
+ DEBUG(BB->dump());
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,41 +609,61 @@ ARMConstantIslands::CPEntry
return NULL;
}
-/// JumpTableFunctionScan - Do a scan of the function, building up
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
+/// scanFunctionJumpTables - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::scanFunctionJumpTables() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I)
- if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
T2JumpTables.push_back(I);
}
}
-/// InitialFunctionScan - Do the initial scan of the function, building up
+/// initializeFunctionInfo - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs) {
- // First thing, see if the function has any inline assembly in it. If so,
- // we have to be conservative about alignment assumptions, as we don't
- // know for sure the size of any instructions in the inline assembly.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->getOpcode() == ARM::INLINEASM)
- HasInlineAsm = true;
- }
+void ARMConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(MF->begin());
// Now go back through the instructions and build up our data structures.
- unsigned Offset = 0;
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
@@ -501,16 +672,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (I->isDebugValue())
continue;
- // Add instruction size to MBBSize.
- MBBSize += TII->GetInstSizeInBytes(I);
int Opc = I->getOpcode();
- if (I->getDesc().isBranch()) {
+ if (I->isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -518,18 +686,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
continue; // Ignore other JT branches
- case ARM::tBR_JTr:
- // A Thumb1 table jump may involve padding; for the offsets to
- // be right, functions containing these must be 4-byte aligned.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr + 2
- // is aligned. That is held in Offset+MBBSize, which already has
- // 2 added in for the size of the mov pc instruction.
- MF.EnsureAlignment(2U);
- if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
- // FIXME: Add a pseudo ALIGN instruction instead.
- MBBSize += 2; // padding
- continue; // Does not get an entry in ImmBranches
case ARM::t2BR_JT:
T2JumpTables.push_back(I);
continue; // Does not get an entry in ImmBranches
@@ -589,7 +745,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
llvm_unreachable("Unknown addressing mode for CP reference!");
- break;
// Taking the address of a CP entry.
case ARM::LEApcrel:
@@ -647,45 +802,53 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
break;
}
}
+ }
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ // Also consider instructions that may be shrunk later.
+ else if (isThumb && mayOptimizeThumb2Instruction(I))
+ BBI.Unalign = 1;
+ }
- // In thumb mode, if this block is a constpool island, we may need padding
- // so it's aligned on 4 byte boundary.
- if (isThumb &&
- !MBB.empty() &&
- MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- ((Offset%4) != 0 || HasInlineAsm))
- MBBSize += 2;
-
- BBSizes.push_back(MBBSize);
- BBOffsets.push_back(Offset);
- Offset += MBBSize;
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->EnsureAlignment(2);
}
}
-/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// getOffsetOf - Return the current offset of the specified machine instruction
/// from the start of the function. This offset changes as stuff is moved
/// around inside the function.
-unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
- unsigned Offset = BBOffsets[MBB->getNumber()];
-
- // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
- // alignment padding, and compensate if so.
- if (isThumb &&
- MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4 != 0 || HasInlineAsm))
- Offset += 2;
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
// Sum instructions before MI in MBB.
- for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- if (&*I == MI) return Offset;
Offset += TII->GetInstSizeInBytes(I);
}
+ return Offset;
}
/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
@@ -695,19 +858,16 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
return LHS->getNumber() < RHS->getNumber();
}
-/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
/// machine function, it upsets all of the block numbers. Renumber the blocks
/// and update the arrays that parallel this numbering.
-void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Renumber the MBB's to keep them consecutive.
NewBB->getParent()->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
@@ -721,15 +881,14 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
-MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
- MachineFunction &MF = *OrigBB->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,31 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
- while (!OrigBB->succ_empty()) {
- MachineBasicBlock *Succ = *OrigBB->succ_begin();
- OrigBB->removeSuccessor(Succ);
- NewBB->addSuccessor(Succ);
-
- // This pass should be run after register allocation, so there should be no
- // PHI nodes to update.
- assert((Succ->empty() || !Succ->begin()->isPHI())
- && "PHI nodes should be eliminated by now!");
- }
+ NewBB->transferSuccessors(OrigBB);
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
// Update internal data structures to account for the newly inserted MBB.
- // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // This is almost the same as updateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add OrigMBB as having
// available water after it (but not if it's already there, which happens
@@ -787,86 +934,56 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- unsigned OrigBBI = OrigBB->getNumber();
- unsigned NewBBI = NewBB->getNumber();
-
- int delta = isThumb1 ? 2 : 4;
-
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- unsigned OrigBBSize = 0;
- for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
- I != E; ++I)
- OrigBBSize += TII->GetInstSizeInBytes(I);
- BBSizes[OrigBBI] = OrigBBSize;
-
- // ...and adjust BBOffsets for NewBB accordingly.
- BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ computeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
- // Set the size of NewBB in BBSizes. It does not include any padding now.
- BBSizes[NewBBI] = NewBBSize;
-
- MachineInstr* ThumbJTMI = prior(NewBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- // We've added another 2-byte instruction before this tablejump, which
- // means we will always need padding if we didn't before, and vice versa.
-
- // The original offset of the jump instruction was:
- unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
- if (OrigOffset%4 == 0) {
- // We had padding before and now we don't. No net change in code size.
- delta = 0;
- } else {
- // We didn't have padding before and now we do.
- BBSizes[NewBBI] += 2;
- delta = 4;
- }
- }
+ computeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
- if (delta)
- AdjustBBOffsetsAfter(NewBB, delta);
+ adjustBBOffsetsAfter(OrigBB);
return NewBB;
}
-/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// getUserOffset - Compute the offset of U.MI as seen by the hardware
+/// displacement computation. Update U.KnownAlignment to match its current
+/// basic block location.
+unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
+ unsigned UserOffset = getOffsetOf(U.MI);
+ const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
+ unsigned KnownBits = BBI.internalKnownBits();
+
+ // The value read from PC is offset from the actual instruction address.
+ UserOffset += (isThumb ? 4 : 8);
+
+ // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
+ // Make sure U.getMaxDisp() returns a constrained range.
+ U.KnownAlignment = (KnownBits >= 2);
+
+ // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // For unknown alignments, getMaxDisp() constrains the range instead.
+ if (isThumb && U.KnownAlignment)
+ UserOffset &= ~3u;
+
+ return UserOffset;
+}
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
/// reference) is within MaxDisp of TrialOffset (a proposed location of a
/// constant pool entry).
-bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+/// UserOffset is computed by getUserOffset above to include PC adjustments. If
+/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
+/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
+bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
unsigned TrialOffset, unsigned MaxDisp,
bool NegativeOK, bool IsSoImm) {
- // On Thumb offsets==2 mod 4 are rounded down by the hardware for
- // purposes of the displacement computation; compensate for that here.
- // Effectively, the valid range of displacements is 2 bytes smaller for such
- // references.
- unsigned TotalAdj = 0;
- if (isThumb && UserOffset%4 !=0) {
- UserOffset -= 2;
- TotalAdj = 2;
- }
- // CPEs will be rounded up to a multiple of 4.
- if (isThumb && TrialOffset%4 != 0) {
- TrialOffset += 2;
- TotalAdj += 2;
- }
-
- // In Thumb2 mode, later branch adjustments can shift instructions up and
- // cause alignment change. In the worst case scenario this can cause the
- // user's effective address to be subtracted by 2 and the CPE's address to
- // be plus 2.
- if (isThumb2 && TotalAdj != 4)
- MaxDisp -= (4 - TotalAdj);
-
if (UserOffset <= TrialOffset) {
// User before the Trial.
if (TrialOffset - UserOffset <= MaxDisp)
@@ -880,40 +997,71 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
return false;
}
-/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// isWaterInRange - Returns true if a CPE placed after the specified
/// Water (a basic block) will be in range for the specific MI.
-
-bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
- MachineBasicBlock* Water, CPUser &U) {
- unsigned MaxDisp = U.MaxDisp;
- unsigned CPEOffset = BBOffsets[Water->getNumber()] +
- BBSizes[Water->getNumber()];
-
- // If the CPE is to be inserted before the instruction, that will raise
- // the offset of the instruction.
- if (CPEOffset < UserOffset)
- UserOffset += U.CPEMI->getOperand(2).getImm();
-
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
}
-/// CPEIsInRange - Returns true if the distance between specific MI and
+/// isCPEntryInRange - Returns true if the distance between specific MI and
/// specific ConstPool entry instruction can fit in MI's displacement field.
-bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
- unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+ assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
- DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
- << " max delta=" << MaxDisp
- << " insn address=" << UserOffset
- << " CPE address=" << CPEOffset
- << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
}
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
}
#ifndef NDEBUG
@@ -933,69 +1081,40 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
}
#endif // NDEBUG
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
- int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
- for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
- i < e; ++i) {
- BBOffsets[i] += delta;
- // If some existing blocks have padding, adjust the padding as needed, a
- // bit tricky. delta can be negative so don't use % on that.
- if (!isThumb)
- continue;
- MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() && !HasInlineAsm) {
- // Constant pool entries require padding.
- if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned OldOffset = BBOffsets[i] - delta;
- if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- }
- }
- // Thumb1 jump tables require padding. They should be at the end;
- // following unconditional branches are removed by AnalyzeBranch.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr
- // is aligned; if it is, the offset of the jump table following the
- // instruction will not be aligned, and we need padding.
- MachineInstr *ThumbJTMI = prior(MBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
- unsigned OldMIOffset = NewMIOffset - delta;
- if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- }
- }
- if (delta==0)
- return;
- }
- MBBI = llvm::next(MBBI);
+void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
}
}
-/// DecrementOldEntry - find the constant pool entry with index CPI
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
/// and instruction CPEMI, and decrement its refcount. If the refcount
/// becomes 0 remove the entry and instruction. Returns true if we removed
/// the entry, false if we didn't.
-bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
// Find the old entry. Eliminate it if it is no longer used.
CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
assert(CPE && "Unexpected!");
if (--CPE->RefCount == 0) {
- RemoveDeadCPEMI(CPEMI);
+ removeDeadCPEMI(CPEMI);
CPE->CPEMI = NULL;
--NumCPEs;
return true;
@@ -1009,14 +1128,15 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
/// 0 = no existing entry found
/// 1 = entry found, and there were no code insertions or deletions
/// 2 = entry found, and there were code insertions or deletions
-int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
{
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
// Check to see if the CPE is already in-range.
- if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
- DEBUG(errs() << "In range\n");
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
return 1;
}
@@ -1030,8 +1150,9 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
// Removing CPEs can leave empty entries, skip
if (CPEs[i].CPEMI == NULL)
continue;
- if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
- DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
<< CPEs[i].CPI << "\n");
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
@@ -1045,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
CPEs[i].RefCount++;
// ...and the original. If we didn't remove the old entry, none of the
// addresses changed, so we don't need another pass.
- return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
}
}
return 0;
@@ -1066,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
return ((1<<23)-1)*4;
}
-/// LookForWater - Look for an existing entry in the WaterList in which
+/// findAvailableWater - Look for an existing entry in the WaterList in which
/// we can place the CPE referenced from U so it's within range of U's MI.
/// Returns true if found, false if not. If it returns true, WaterIter
/// is set to the WaterList entry. For Thumb, prefer water that will not
@@ -1074,15 +1195,14 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// terminates, the CPE location for a particular CPUser is only allowed to
/// move to a lower address, so search backward from the end of the list and
/// prefer the first water that is in range.
-bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
water_iterator &WaterIter) {
if (WaterList.empty())
return false;
- bool FoundWaterThatWouldPad = false;
- water_iterator IPThatWouldPad;
- for (water_iterator IP = prior(WaterList.end()),
- B = WaterList.begin();; --IP) {
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and is either at a lower address than the
// current "high water mark" or a new water block that was created since
@@ -1092,166 +1212,186 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
- if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB))) {
- unsigned WBBId = WaterBB->getNumber();
- if (isThumb &&
- (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
- // This is valid Water, but would introduce padding. Remember
- // it in case we don't find any Water that doesn't do this.
- if (!FoundWaterThatWouldPad) {
- FoundWaterThatWouldPad = true;
- IPThatWouldPad = IP;
- }
- } else {
- WaterIter = IP;
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
return true;
- }
}
if (IP == B)
break;
}
- if (FoundWaterThatWouldPad) {
- WaterIter = IPThatWouldPad;
- return true;
- }
- return false;
+ return BestGrowth != ~0u;
}
-/// CreateNewWater - No existing WaterList entry will work for
+/// createNewWater - No existing WaterList entry will work for
/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
/// block is used if in range, and the conditional branch munged so control
/// flow is correct. Otherwise the block is split to create a hole with an
/// unconditional branch around it. In either case NewMBB is set to a
/// block following which the new island can be inserted (the WaterList
/// is not adjusted).
-void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned UserOffset,
MachineBasicBlock *&NewMBB) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
- unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
- BBSizes[UserMBB->getNumber()];
- assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
// end of the block is within range, make new water there. (The addition
// below is for the unconditional branch we will be adding: 4 bytes on ARM +
- // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
- // inside OffsetIsInRange.
- if (BBHasFallthrough(UserMBB) &&
- OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- DEBUG(errs() << "Split at end of block\n");
- if (&UserMBB->back() == UserMI)
- assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
- // Add an unconditional branch from UserMBB to fallthrough block.
- // Record it for branch lengthening; this new branch will not get out of
- // range, but if the preceding conditional branch is out of range, the
- // targets will be exchanged, and the altered branch may be out of
- // range, so the machinery has to know about it.
- int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- if (!isThumb)
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
- else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
- unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
- ImmBranches.push_back(ImmBranch(&UserMBB->back(),
- MaxDisp, false, UncondBr));
- int delta = isThumb1 ? 2 : 4;
- BBSizes[UserMBB->getNumber()] += delta;
- AdjustBBOffsetsAfter(UserMBB, delta);
- } else {
- // What a big block. Find a place within the block to split it.
- // This is a little tricky on Thumb1 since instructions are 2 bytes
- // and constant pool entries are 4 bytes: if instruction I references
- // island CPE, and instruction I+1 references CPE', it will
- // not work well to put CPE as far forward as possible, since then
- // CPE' cannot immediately follow it (that location is 2 bytes
- // farther away from I+1 than CPE was from I) and we'd need to create
- // a new island. So, we make a first guess, then walk through the
- // instructions between the one currently being looked at and the
- // possible insertion point, and make sure any other instructions
- // that reference CPEs will be able to use the same island area;
- // if not, we back up the insertion point.
-
- // The 4 in the following is for the unconditional branch we'll be
- // inserting (allows for long branch on Thumb1). Alignment of the
- // island is handled inside OffsetIsInRange.
- unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
- // This could point off the end of the block if we've already got
- // constant pool entries following this block; only the last one is
- // in the water list. Back past any possible branches (allow for a
- // conditional and a maximally long unconditional).
- if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
- BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset = BaseInsertOffset +
- CPEMI->getOperand(2).getImm();
- MachineBasicBlock::iterator MI = UserMI;
- ++MI;
- unsigned CPUIndex = CPUserIndex+1;
- unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
- Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
- CPUser &U = CPUsers[CPUIndex];
- if (!OffsetIsInRange(Offset, EndInsertOffset,
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- BaseInsertOffset -= (isThumb1 ? 2 : 4);
- EndInsertOffset -= (isThumb1 ? 2 : 4);
- }
- // This is overly conservative, as we don't account for CPEMIs
- // being reused within the block, but it doesn't matter much.
- EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
- CPUIndex++;
- }
+ // Thumb2, 2 on Thumb1.
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // End of UserBlock after adding a branch.
+ unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+ UserBBI.postKnownBits());
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
- // Remember the last IT instruction.
- if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then
+ // WorstCaseAlign to LogAlign.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // Account for alignment and unknown padding.
+ BaseInsertOffset &= ~((1u << LogAlign) - 1);
+ BaseInsertOffset -= UPad;
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+ BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset =
+ WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+ 1u << getCPELogAlign(U.CPEMI)) +
+ U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
}
- DEBUG(errs() << "Split in middle of big block\n");
- --MI;
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ --MI;
- // Avoid splitting an IT block.
- if (LastIT) {
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
- if (CC != ARMCC::AL)
- MI = LastIT;
- }
- NewMBB = SplitBlockBeforeInstr(MI);
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
}
+ NewMBB = splitBlockBeforeInstr(MI);
}
-/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
- unsigned CPUserIndex) {
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
unsigned CPI = CPEMI->getOperand(1).getIndex();
unsigned Size = CPEMI->getOperand(2).getImm();
- // Compute this only once, it's expensive. The 4 or 8 is the value the
- // hardware keeps in the PC.
- unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
// See if the current entry is within range, or there is a clone of it
// in range.
- int result = LookForExistingCPEntry(U, UserOffset);
+ int result = findInRangeCPEntry(U, UserOffset);
if (result==1) return false;
else if (result==2) return true;
@@ -1260,11 +1400,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
unsigned ID = AFI->createPICLabelUId();
// Look for water where we can place this CPE.
- MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
- if (LookForWater(U, UserOffset, IP)) {
- DEBUG(errs() << "found water in range\n");
+ if (findAvailableWater(U, UserOffset, IP)) {
+ DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
// If the original WaterList entry was "new water" on this iteration,
@@ -1279,10 +1419,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
} else {
// No water found.
- DEBUG(errs() << "No water found\n");
- CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+ DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
- // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
// called while handling branches so that the water will be seen on the
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
@@ -1304,13 +1444,13 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF.insert(NewMBB, NewIsland);
+ MF->insert(NewMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
- UpdateForInsertedWaterBlock(NewIsland);
+ updateForInsertedWaterBlock(NewIsland);
// Decrement the old entry, and remove it if refcount becomes 0.
- DecrementOldEntry(CPI, CPEMI);
+ decrementCPEReferenceCount(CPI, CPEMI);
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
@@ -1320,13 +1460,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
- BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
- // Compensate for .align 2 in thumb mode.
- if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
- Size += 2;
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
// Increase the size of the island block to account for the new entry.
- BBSizes[NewIsland->getNumber()] += Size;
- AdjustBBOffsetsAfter(NewIsland, Size);
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,31 +1474,30 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
break;
}
- DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
- << '\t' << *UserMI);
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
return true;
}
-/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
/// sizes and offsets of impacted basic blocks.
-void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
MachineBasicBlock *CPEBB = CPEMI->getParent();
unsigned Size = CPEMI->getOperand(2).getImm();
CPEMI->eraseFromParent();
- BBSizes[CPEBB->getNumber()] -= Size;
+ BBInfo[CPEBB->getNumber()].Size -= Size;
// All succeeding offsets have the current size value added in, fix this.
if (CPEBB->empty()) {
- // In thumb1 mode, the size of island may be padded by two to compensate for
- // the alignment requirement. Then it will now be 2 when the block is
- // empty, so fix this.
- // All succeeding offsets have the current size value added in, fix this.
- if (BBSizes[CPEBB->getNumber()] != 0) {
- Size += BBSizes[CPEBB->getNumber()];
- BBSizes[CPEBB->getNumber()] = 0;
- }
- }
- AdjustBBOffsetsAfter(CPEBB, -Size);
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
// this BB's predecessor jumps directly to this BB's successor. This
// shouldn't happen currently.
@@ -1367,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
// FIXME: remove the empty blocks after all the work is done?
}
-/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
/// are zero.
-bool ARMConstantIslands::RemoveUnusedCPEntries() {
+bool ARMConstantIslands::removeUnusedCPEntries() {
unsigned MadeChange = false;
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
std::vector<CPEntry> &CPEs = CPEntries[i];
for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
- RemoveDeadCPEMI(CPEs[j].CPEMI);
+ removeDeadCPEMI(CPEs[j].CPEMI);
CPEs[j].CPEMI = NULL;
MadeChange = true;
}
@@ -1384,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() {
return MadeChange;
}
-/// BBIsInRange - Returns true if the distance between specific MI and
+/// isBBInRange - Returns true if the distance between specific MI and
/// specific BB can fit in MI's displacement field.
-bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
unsigned MaxDisp) {
unsigned PCAdj = isThumb ? 4 : 8;
- unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
- DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
<< " from BB#" << MI->getParent()->getNumber()
<< " max delta=" << MaxDisp
- << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
<< " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
if (BrOffset <= DestOffset) {
@@ -1409,50 +1547,50 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
return false;
}
-/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
/// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
// Check to see if the DestBB is already in-range.
- if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
return false;
if (!Br.isCond)
- return FixUpUnconditionalBr(MF, Br);
- return FixUpConditionalBr(MF, Br);
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
}
-/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
/// too far away to fit in its displacement field. If the LR register has been
/// spilled in the epilogue, then we can use BL to implement a far jump.
/// Otherwise, add an intermediate branch instruction to a branch.
bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *MBB = MI->getParent();
if (!isThumb1)
- llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+ llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
// Use BL to implement far jump.
Br.MaxDisp = (1 << 21) * 2;
MI->setDesc(TII->get(ARM::tBfar));
- BBSizes[MBB->getNumber()] += 2;
- AdjustBBOffsetsAfter(MBB, 2);
+ BBInfo[MBB->getNumber()].Size += 2;
+ adjustBBOffsetsAfter(MBB);
HasFarJump = true;
++NumUBrFixed;
- DEBUG(errs() << " Changed B to long jump " << *MI);
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
return true;
}
-/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1486,8 +1624,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// bne L2
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
- if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
- DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
MI->getOperand(0).setMBB(NewDest);
@@ -1498,19 +1636,17 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
}
if (NeedSplit) {
- SplitBlockBeforeInstr(MI);
+ splitBlockBeforeInstr(MI);
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
- BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
- AdjustBBOffsetsAfter(SplitBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
- // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
- DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
<< NextBB->getNumber() << "\n");
@@ -1519,30 +1655,27 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
-
- // The net size change is an addition of one unconditional branch.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
- AdjustBBOffsetsAfter(MBB, delta);
+ adjustBBOffsetsAfter(MBB);
return true;
}
-/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
/// LR / restores LR to pc. FIXME: This is done here because it's only possible
/// to do this if tBfar is not used.
-bool ARMConstantIslands::UndoLRSpillRestore() {
+bool ARMConstantIslands::undoLRSpillRestore() {
bool MadeChange = false;
for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
MachineInstr *MI = PushPopMIs[i];
@@ -1561,7 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+bool
+ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ return true;
+ }
+ return false;
+}
+
+bool ARMConstantIslands::optimizeThumb2Instructions() {
bool MadeChange = false;
// Shrink ADR and LDR from constantpool.
@@ -1592,25 +1744,31 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
if (!NewOpc)
continue;
- unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+ unsigned UserOffset = getUserOffset(U);
unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+
+ // Be conservative with inline asm.
+ if (!U.KnownAlignment)
+ MaxOffs -= 2;
+
// FIXME: Check if offset is multiple of scale if scale is not 4.
- if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ DEBUG(dbgs() << "Shrink: " << *U.MI);
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
++NumT2CPShrunk;
MadeChange = true;
}
}
- MadeChange |= OptimizeThumb2Branches(MF);
- MadeChange |= OptimizeThumb2JumpTables(MF);
+ MadeChange |= optimizeThumb2Branches();
+ MadeChange |= optimizeThumb2JumpTables();
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::optimizeThumb2Branches() {
bool MadeChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1636,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (NewOpc) {
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
- if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+ if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
}
@@ -1650,9 +1809,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (Opcode != ARM::tBcc)
continue;
+ // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
+ // so this transformation is not safe.
+ if (!Br.MI->killsRegister(ARM::CPSR))
+ continue;
+
NewOpc = 0;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
@@ -1662,27 +1826,28 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
- unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
MachineBasicBlock::iterator CmpMI = Br.MI;
if (CmpMI != Br.MI->getParent()->begin()) {
--CmpMI;
if (CmpMI->getOpcode() == ARM::tCMPi8) {
unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ Pred = getInstrPredicate(CmpMI, PredReg);
if (Pred == ARMCC::AL &&
CmpMI->getOperand(1).getImm() == 0 &&
isARMLowRegister(Reg)) {
MachineBasicBlock *MBB = Br.MI->getParent();
+ DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
MachineInstr *NewBR =
BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
.addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
CmpMI->eraseFromParent();
Br.MI->eraseFromParent();
Br.MI = NewBR;
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
}
@@ -1694,14 +1859,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
return MadeChange;
}
-/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::optimizeThumb2JumpTables() {
bool MadeChange = false;
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,18 +1874,18 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
bool ByteOk = true;
bool HalfWordOk = true;
- unsigned JTOffset = GetOffsetOf(MI) + 4;
+ unsigned JTOffset = getOffsetOf(MI) + 4;
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
MachineBasicBlock *MBB = JTBBs[j];
- unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1791,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
if (!OptOk)
continue;
+ DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI
+ << " lea: " << *LeaMI);
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
.addImm(MI->getOperand(JTOpIdx+1).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
// FIXME: Insert an "ALIGN" instruction to ensure the next instruction
// is 2-byte aligned. For now, asm printer will fix it up.
unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
@@ -1808,8 +1976,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MI->eraseFromParent();
int delta = OrigSize - NewSize;
- BBSizes[MBB->getNumber()] -= delta;
- AdjustBBOffsetsAfter(MBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
+ adjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
@@ -1819,12 +1987,12 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
return MadeChange;
}
-/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
/// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::reorderThumb2JumpTables() {
bool MadeChange = false;
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +2000,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1850,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
// The destination precedes the switch. Try to move the block forward
// so we have a positive offset.
MachineBasicBlock *NewBB =
- AdjustJTTargetBlockForward(MBB, MI->getParent());
+ adjustJTTargetBlockForward(MBB, MI->getParent());
if (NewBB)
MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
MadeChange = true;
@@ -1862,10 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
}
MachineBasicBlock *ARMConstantIslands::
-AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
-{
- MachineFunction &MF = *BB->getParent();
-
+adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -1882,22 +2047,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF.begin() &&
+ if (!B && Cond.empty() && BB != MF->begin() &&
!TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();
// Update numbering to account for the block being moved.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
++NumJTMoved;
return NULL;
}
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
MachineFunction::iterator MBBI = JTBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +2072,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
.addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
// Update the CFG.
NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index aadfd4779db3..fa3226e37eb9 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -1,4 +1,4 @@
-//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -48,7 +48,6 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {}
const char *ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
- default: llvm_unreachable("Unknown modifier!");
// FIXME: Are these case sensitive? It'd be nice to lower-case all the
// strings if that's legal.
case ARMCP::no_modifier: return "none";
@@ -58,12 +57,12 @@ const char *ARMConstantPoolValue::getModifierText() const {
case ARMCP::GOTTPOFF: return "gottpoff";
case ARMCP::TPOFF: return "tpoff";
}
+ llvm_unreachable("Unknown modifier!");
}
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- assert(false && "Shouldn't be calling this directly!");
- return -1;
+ llvm_unreachable("Shouldn't be calling this directly!");
}
void
@@ -315,5 +314,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
}
void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ O << "BB#" << MBB->getNumber();
ARMConstantPoolValue::print(O);
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 0d0def32b7d8..6b98d446b003 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -1,4 +1,4 @@
-//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
index 51e68b4553ff..f671317d0948 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -41,43 +41,38 @@ unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
case ARM::reloc_arm_machine_cp_entry:
case ARM::reloc_arm_jt_base:
case ARM::reloc_arm_pic_jt:
- assert(0 && "unsupported ARM relocation type"); break;
-
- case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
- case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break;
- case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break;
+ llvm_unreachable("unsupported ARM relocation type");
+
+ case ARM::reloc_arm_branch: return ELF::R_ARM_CALL;
+ case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS;
+ case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC;
default:
- llvm_unreachable("unknown ARM relocation type"); break;
+ llvm_unreachable("unknown ARM relocation type");
}
- return 0;
}
long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
long int Modifier) const {
- assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not "
+ "implemented");
}
unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
- assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented");
}
bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
- assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
- return 1;
+ llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented");
}
unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
- assert(0 &&
- "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
+ "implemented");
}
long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
unsigned RelOffset,
unsigned RelTy) const {
- assert(0 &&
- "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
+ "implemented");
}
diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h
index 1c4e5329ac61..6a84f8ac4235 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.h
+++ b/lib/Target/ARM/ARMELFWriterInfo.h
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetELFWriterInfo.h"
namespace llvm {
+ class TargetMachine;
class ARMELFWriterInfo : public TargetELFWriterInfo {
public:
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 7872cb90f4e7..5fc0360528cc 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1,4 +1,4 @@
-//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,7 +19,6 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -61,7 +60,7 @@ namespace {
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs);
+ unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
@@ -99,13 +98,20 @@ namespace {
// Entries for NEON load/store information table. The table is sorted by
// PseudoOpc for fast binary-search lookups.
struct NEONLdStTableEntry {
- unsigned PseudoOpc;
- unsigned RealOpc;
+ uint16_t PseudoOpc;
+ uint16_t RealOpc;
bool IsLoad;
- bool HasWriteBack;
+ bool isUpdating;
+ bool hasWritebackOperand;
NEONRegSpacing RegSpacing;
unsigned char NumRegs; // D registers loaded or stored
unsigned char RegElts; // elements per D register; used for lane ops
+ // FIXME: Temporary flag to denote whether the real instruction takes
+ // a single register (like the encoding) or all of the registers in
+ // the list (like the asm syntax and the isel DAG). When all definitions
+ // are converted to take only the single encoded register, this will
+ // go away.
+ bool copyAllListRegs;
// Comparison methods for binary search of the table.
bool operator<(const NEONLdStTableEntry &TE) const {
@@ -122,243 +128,203 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8},
-{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8},
-
-{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
-{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
-
-{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
-{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
-{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
-{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
-
-{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
-{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
-{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
-{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
-{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
-{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
-{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
-{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
-
-{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4},
-{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4},
-{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2},
-{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2},
-{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8},
-{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8},
-
-{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
-{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
-{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
-{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
-{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
-{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
-{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
-{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
-{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
-{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
-
-{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
-{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
-{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
-{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
-{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
-{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
-
-{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
-{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
-{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
-{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
-{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
-{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
-
-{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
-{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
-{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
-{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
-{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
-{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
-
-{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
-{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
-{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
-{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
-{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
-{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
-{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
-{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
-{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
-{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
-
-{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
-{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
-{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
-{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
-{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
-{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
-
-{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
-{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
-{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
-{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
-{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
-{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
-{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
-{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
-{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
-
-{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
-{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4},
-{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2},
-{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2},
-{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8},
-{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8},
-
-{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
-{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
-{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
-{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
-{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
-{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
-{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
-{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
-{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
-{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
-
-{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
-{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
-{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
-{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
-{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
-{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
-
-{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
-{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
-{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
-{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
-{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
-{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
-{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
-{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
-{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
-
-{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
-{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
-{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
-{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
-{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
-{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
-
-{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
-{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
-{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
-{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
-
-{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
-{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
-{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
-{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
-{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
-{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
-{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
-{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
-
-{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
-{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
-{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
-{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
-{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
-{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
-{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
-{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
-{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
-{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
-
-{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
-{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
-{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
-{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
-{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
-{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
-
-{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
-{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
-{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
-{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
-{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
-{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
-
-{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
-{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
-{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
-{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
-{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
-{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
-{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
-{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
-{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
-{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
-
-{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
-{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
-{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
-{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
-{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
-{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
-
-{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
-{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
-{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
-{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
-{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
-{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
-{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
-{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
-{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
-
-{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
-{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
-{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
-{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
-{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
-{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
-{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
-{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
-{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
-{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
-
-{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
-{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
-{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
-{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
-{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
-{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
-
-{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
-{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
-{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
-{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
-{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
-{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
-{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
-{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
-{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
};
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- unsigned NumEntries = array_lengthof(NEONLdStTable);
+ const unsigned NumEntries = array_lengthof(NEONLdStTable);
#ifndef NDEBUG
// Make sure the table is sorted.
@@ -422,21 +388,22 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 2)
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 3)
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
- if (TableEntry->HasWriteBack)
+ if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (TableEntry->HasWriteBack)
+ if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
// For an instruction writing double-spaced subregs, the pseudo instruction
@@ -481,24 +448,26 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
- if (TableEntry->HasWriteBack)
+ if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (TableEntry->HasWriteBack)
+ if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
+ MIB.addReg(D0);
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1);
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
MIB.addReg(D2);
- if (NumRegs > 3)
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
MIB.addReg(D3);
// Copy the predicate operands.
@@ -558,14 +527,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
}
- if (TableEntry->HasWriteBack)
+ if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (TableEntry->HasWriteBack)
+ if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
// Grab the super-register source.
@@ -599,13 +568,15 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
}
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs) {
+ unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
@@ -621,11 +592,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
- MIB.addReg(D2);
- if (NumRegs > 3)
- MIB.addReg(D3);
+ MIB.addReg(D0);
// Copy the other source register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
@@ -645,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
@@ -809,7 +776,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::Int_eh_sjlj_dispatchsetup: {
+ case ARM::Int_eh_sjlj_dispatchsetup:
+ case ARM::Int_eh_sjlj_dispatchsetup_nofp:
+ case ARM::tInt_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*>(TII);
@@ -824,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
"base pointer without frame pointer?");
if (AFI->isThumb2Function()) {
- llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
} else if (AFI->isThumbFunction()) {
- llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, *TII, RI);
+ emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
} else {
- llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0,
- *TII);
+ emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
}
// If there's dynamic realignment, adjust for it.
if (RI.needsStackRealignment(MF)) {
@@ -996,6 +965,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
return true;
}
@@ -1026,6 +996,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
return true;
}
@@ -1057,26 +1028,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::VLD1q8Pseudo:
- case ARM::VLD1q16Pseudo:
- case ARM::VLD1q32Pseudo:
- case ARM::VLD1q64Pseudo:
- case ARM::VLD1q8Pseudo_UPD:
- case ARM::VLD1q16Pseudo_UPD:
- case ARM::VLD1q32Pseudo_UPD:
- case ARM::VLD1q64Pseudo_UPD:
- case ARM::VLD2d8Pseudo:
- case ARM::VLD2d16Pseudo:
- case ARM::VLD2d32Pseudo:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -1084,7 +1044,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD3d8Pseudo_UPD:
case ARM::VLD3d16Pseudo_UPD:
case ARM::VLD3d32Pseudo_UPD:
- case ARM::VLD1d64TPseudo_UPD:
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
@@ -1101,7 +1060,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
case ARM::VLD4d32Pseudo_UPD:
- case ARM::VLD1d64QPseudo_UPD:
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
@@ -1111,18 +1069,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8Pseudo_UPD:
- case ARM::VLD2DUPd16Pseudo_UPD:
- case ARM::VLD2DUPd32Pseudo_UPD:
case ARM::VLD3DUPd8Pseudo:
case ARM::VLD3DUPd16Pseudo:
case ARM::VLD3DUPd32Pseudo:
@@ -1138,26 +1084,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandVLD(MBBI);
return true;
- case ARM::VST1q8Pseudo:
- case ARM::VST1q16Pseudo:
- case ARM::VST1q32Pseudo:
- case ARM::VST1q64Pseudo:
- case ARM::VST1q8Pseudo_UPD:
- case ARM::VST1q16Pseudo_UPD:
- case ARM::VST1q32Pseudo_UPD:
- case ARM::VST1q64Pseudo_UPD:
- case ARM::VST2d8Pseudo:
- case ARM::VST2d16Pseudo:
- case ARM::VST2d32Pseudo:
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
- case ARM::VST2d8Pseudo_UPD:
- case ARM::VST2d16Pseudo_UPD:
- case ARM::VST2d32Pseudo_UPD:
- case ARM::VST2q8Pseudo_UPD:
- case ARM::VST2q16Pseudo_UPD:
- case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
@@ -1165,7 +1100,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST3d8Pseudo_UPD:
case ARM::VST3d16Pseudo_UPD:
case ARM::VST3d32Pseudo_UPD:
- case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
@@ -1182,7 +1118,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST4d8Pseudo_UPD:
case ARM::VST4d16Pseudo_UPD:
case ARM::VST4d32Pseudo_UPD:
- case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
@@ -1270,15 +1207,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
- case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
- case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
- case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
- case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
- case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
- case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}
-
- return false;
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index dc8e54ddf957..2e1eaca85b5b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,7 +16,6 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
-#include "ARMRegisterInfo.h"
#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
@@ -37,7 +36,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -90,7 +88,7 @@ class ARMFastISel : public FastISel {
ARMFunctionInfo *AFI;
// Convenience variables to avoid some queries.
- bool isThumb;
+ bool isThumb2;
LLVMContext *Context;
public:
@@ -101,7 +99,7 @@ class ARMFastISel : public FastISel {
TLI(*TM.getTargetLowering()) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
- isThumb = AFI->isThumbFunction();
+ isThumb2 = AFI->isThumbFunction();
Context = &funcInfo.Fn->getContext();
}
@@ -148,6 +146,8 @@ class ARMFastISel : public FastISel {
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
#include "ARMGenFastISel.inc"
@@ -156,27 +156,40 @@ class ARMFastISel : public FastISel {
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
bool SelectCmp(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
- bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
- bool SelectSIToFP(const Instruction *I);
- bool SelectFPToSI(const Instruction *I);
- bool SelectSDiv(const Instruction *I);
- bool SelectSRem(const Instruction *I);
- bool SelectCall(const Instruction *I);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectIToFP(const Instruction *I, bool isSigned);
+ bool SelectFPToI(const Instruction *I, bool isSigned);
+ bool SelectDiv(const Instruction *I, bool isSigned);
+ bool SelectRem(const Instruction *I, bool isSigned);
+ bool SelectCall(const Instruction *I, const char *IntrMemName);
+ bool SelectIntrinsicCall(const IntrinsicInst &I);
bool SelectSelect(const Instruction *I);
bool SelectRet(const Instruction *I);
- bool SelectIntCast(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
+
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
- void ARMSimplifyAddress(Address &Addr, EVT VT);
+ void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+ bool ARMIsMemCpySmall(uint64_t Len);
+ bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
+ unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
@@ -186,8 +199,6 @@ class ARMFastISel : public FastISel {
// Call handling routines.
private:
- bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
- unsigned &ResultReg);
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
@@ -208,7 +219,7 @@ class ARMFastISel : public FastISel {
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
void AddLoadStoreOperands(EVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags);
+ unsigned Flags, bool useAM3);
};
} // end anonymous namespace
@@ -219,8 +230,7 @@ class ARMFastISel : public FastISel {
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasOptionalDef())
+ if (!MI->hasOptionalDef())
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
@@ -290,10 +300,10 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -310,11 +320,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill));
@@ -333,12 +343,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addReg(Op2, Op2IsKill * RegState::Kill));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
@@ -357,11 +367,11 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm));
@@ -379,11 +389,11 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm));
@@ -402,12 +412,12 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
@@ -425,10 +435,10 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addImm(Imm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(Imm));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -444,10 +454,10 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addImm(Imm1).addImm(Imm2));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(Imm1).addImm(Imm2));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -464,9 +474,10 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
+
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
return ResultReg;
}
@@ -477,7 +488,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::VMOVRS), MoveReg)
+ TII.get(ARM::VMOVSR), MoveReg)
.addReg(SrcReg));
return MoveReg;
}
@@ -487,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::VMOVSR), MoveReg)
+ TII.get(ARM::VMOVRS), MoveReg)
.addReg(SrcReg));
return MoveReg;
}
@@ -541,22 +552,42 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
- // For now 32-bit only.
- if (VT != MVT::i32) return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return false;
// If we can do this in a single instruction without a constant pool entry
// do so now.
const ConstantInt *CI = cast<ConstantInt>(C);
- if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
- unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
+ unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
+ unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), DestReg)
- .addImm(CI->getSExtValue()));
- return DestReg;
+ TII.get(Opc), ImmReg)
+ .addImm(CI->getZExtValue()));
+ return ImmReg;
}
+ // Use MVN to emit negative constants.
+ if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
+ unsigned Imm = (unsigned)~(CI->getSExtValue());
+ bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ if (UseImm) {
+ unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
+ unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ImmReg)
+ .addImm(Imm));
+ return ImmReg;
+ }
+ }
+
+ // Load from constant pool. For now 32-bit only.
+ if (VT != MVT::i32)
+ return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
if (Align == 0) {
@@ -565,7 +596,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
}
unsigned Idx = MCP.getConstantPoolIndex(C, Align);
- if (isThumb)
+ if (isThumb2)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRpci), DestReg)
.addConstantPoolIndex(Idx));
@@ -586,44 +617,69 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
Reloc::Model RelocM = TM.getRelocationModel();
// TODO: Need more magic for ARM PIC.
- if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
-
- // MachineConstantPool wants an explicit alignment.
- unsigned Align = TD.getPrefTypeAlignment(GV->getType());
- if (Align == 0) {
- // TODO: Figure out if this is correct.
- Align = TD.getTypeAllocSize(GV->getType());
- }
+ if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
- // Grab index.
- unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
- unsigned Id = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
- ARMCP::CPValue,
- PCAdj);
- unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
-
- // Load value.
- MachineInstrBuilder MIB;
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
- if (isThumb) {
- unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
- .addConstantPoolIndex(Idx);
- if (RelocM == Reloc::PIC_)
- MIB.addImm(Id);
+
+ // Use movw+movt when possible, it avoids constant pool entries.
+ // Darwin targets don't support movt with Reloc::Static, see
+ // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support
+ // static movt relocations.
+ if (Subtarget->useMovt() &&
+ Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
+ unsigned Opc;
+ switch (RelocM) {
+ case Reloc::PIC_:
+ Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
+ break;
+ case Reloc::DynamicNoPIC:
+ Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
+ break;
+ default:
+ Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ break;
+ }
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg).addGlobalAddress(GV));
} else {
- // The extra immediate is for addrmode2.
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
- DestReg)
- .addConstantPoolIndex(Idx)
- .addImm(0);
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
+ (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ if (isThumb2) {
+ unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ }
+ AddOptionalDefs(MIB);
}
- AddOptionalDefs(MIB);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
- if (isThumb)
+ if (isThumb2)
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRi12), NewDestReg)
.addReg(DestReg)
@@ -656,6 +712,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
+// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
+
unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -669,10 +727,10 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
// This will get lowered later into the correct offsets and registers
// via rewriteXFrameIndex.
if (SI != FuncInfo.StaticAllocaMap.end()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
- unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(SI->second)
.addImm(0));
@@ -699,7 +757,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
- if (VT == MVT::i8 || VT == MVT::i16)
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
@@ -813,35 +871,33 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
}
}
- // Materialize the global variable's address into a reg which can
- // then be used later to load the variable.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
- unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
- if (Tmp == 0) return false;
-
- Addr.Base.Reg = Tmp;
- return true;
- }
-
// Try to get this in a register if nothing else has worked.
if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
return Addr.Base.Reg != 0;
}
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
bool needsLowering = false;
switch (VT.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Unhandled load/store type!");
+ default: llvm_unreachable("Unhandled load/store type!");
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
- // Integer loads/stores handle 12-bit offsets.
- needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ if (!useAM3) {
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ // Handle negative offsets.
+ if (needsLowering && isThumb2)
+ needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
+ Addr.Offset > -256);
+ } else {
+ // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
+ needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
+ }
break;
case MVT::f32:
case MVT::f64:
@@ -854,11 +910,11 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
- TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
- ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass
+ : ARM::GPRRegisterClass;
unsigned ResultReg = createResultReg(RC);
- unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(Addr.Base.FI)
.addImm(0));
@@ -877,7 +933,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags) {
+ unsigned Flags, bool useAM3) {
// addrmode5 output depends on the selection dag addressing dividing the
// offset by 4 that it then later multiplies. Do this here as well.
if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
@@ -897,60 +953,127 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
- // ARM halfword load/stores need an additional operand.
- if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
-
- MIB.addImm(Addr.Offset);
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
MIB.addMemOperand(MMO);
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.Base.Reg);
- // ARM halfword load/stores need an additional operand.
- if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
-
- MIB.addImm(Addr.Offset);
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
}
AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
-
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment, bool isZExt, bool allocReg) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
- TargetRegisterClass *RC;
+ bool useAM3 = false;
+ bool needVMOV = false;
+ const TargetRegisterClass *RC;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
- case MVT::i16:
- Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ case MVT::i1:
+ case MVT::i8:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
+ else
+ Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
+ } else {
+ if (isZExt) {
+ Opc = ARM::LDRBi12;
+ } else {
+ Opc = ARM::LDRSB;
+ useAM3 = true;
+ }
+ }
RC = ARM::GPRRegisterClass;
break;
- case MVT::i8:
- Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ case MVT::i16:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
+ else
+ Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
+ } else {
+ Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
+ useAM3 = true;
+ }
RC = ARM::GPRRegisterClass;
break;
case MVT::i32:
- Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = ARM::t2LDRi8;
+ else
+ Opc = ARM::t2LDRi12;
+ } else {
+ Opc = ARM::LDRi12;
+ }
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
- Opc = ARM::VLDRS;
- RC = TLI.getRegClassFor(VT);
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
break;
case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
Opc = ARM::VLDRD;
RC = TLI.getRegClassFor(VT);
break;
}
// Simplify this down to something we can handle.
- ARMSimplifyAddress(Addr, VT);
+ ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
- ResultReg = createResultReg(RC);
+ if (allocReg)
+ ResultReg = createResultReg(RC);
+ assert (ResultReg > 255 && "Expected an allocated virtual register.");
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg);
- AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
return true;
}
@@ -969,51 +1092,92 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
unsigned StrOpc;
+ bool useAM3 = false;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
ARM::GPRRegisterClass);
- unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
SrcReg = Res;
} // Fallthrough here.
case MVT::i8:
- StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRBi8;
+ else
+ StrOpc = ARM::t2STRBi12;
+ } else {
+ StrOpc = ARM::STRBi12;
+ }
break;
case MVT::i16:
- StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRHi8;
+ else
+ StrOpc = ARM::t2STRHi12;
+ } else {
+ StrOpc = ARM::STRH;
+ useAM3 = true;
+ }
break;
case MVT::i32:
- StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRi8;
+ else
+ StrOpc = ARM::t2STRi12;
+ } else {
+ StrOpc = ARM::STRi12;
+ }
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
- StrOpc = ARM::VSTRS;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
break;
case MVT::f64:
if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
StrOpc = ARM::VSTRD;
break;
}
// Simplify this down to something we can handle.
- ARMSimplifyAddress(Addr, VT);
+ ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
- .addReg(SrcReg, getKillRegState(true));
- AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
+ .addReg(SrcReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
return true;
}
@@ -1039,7 +1203,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
return true;
}
@@ -1099,30 +1264,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// If we can, avoid recomputing the compare - redoing it could lead to wonky
// behavior.
- // TODO: Factor this out.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- MVT SourceVT;
- Type *Ty = CI->getOperand(0)->getType();
- if (CI->hasOneUse() && (CI->getParent() == I->getParent())
- && isTypeLegal(Ty, SourceVT)) {
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
- if (isFloat && !Subtarget->hasVFP2())
- return false;
-
- unsigned CmpOpc;
- switch (SourceVT.SimpleTy) {
- default: return false;
- // TODO: Verify compares.
- case MVT::f32:
- CmpOpc = ARM::VCMPES;
- break;
- case MVT::f64:
- CmpOpc = ARM::VCMPED;
- break;
- case MVT::i32:
- CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- break;
- }
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// Get the compare predicate.
// Try to take advantage of fallthrough opportunities.
@@ -1137,23 +1280,11 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// We may not handle every CC for now.
if (ARMPred == ARMCC::AL) return false;
- unsigned Arg1 = getRegForValue(CI->getOperand(0));
- if (Arg1 == 0) return false;
-
- unsigned Arg2 = getRegForValue(CI->getOperand(1));
- if (Arg2 == 0) return false;
-
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CmpOpc))
- .addReg(Arg1).addReg(Arg2));
-
- // For floating point we need to move the result to a comparison register
- // that we can then use for branches.
- if (isFloat)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::FMSTAT)));
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
@@ -1164,7 +1295,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
MVT SourceVT;
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
- unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
unsigned OpReg = getRegForValue(TI->getOperand(0));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TstOpc))
@@ -1176,7 +1307,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
CCMode = ARMCC::EQ;
}
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
@@ -1184,6 +1315,12 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
}
unsigned CmpReg = getRegForValue(BI->getCondition());
@@ -1196,7 +1333,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
- unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
.addReg(CmpReg).addImm(1));
@@ -1206,7 +1343,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
CCMode = ARMCC::EQ;
}
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
@@ -1214,70 +1351,155 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
return true;
}
-bool ARMFastISel::SelectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
+bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0) return false;
- MVT VT;
- Type *Ty = CI->getOperand(0)->getType();
- if (!isTypeLegal(Ty, VT))
- return false;
+ unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(AddrReg));
+ return true;
+}
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt) {
+ Type *Ty = Src1Value->getType();
+ EVT SrcVT = TLI.getValueType(Ty, true);
+ if (!SrcVT.isSimple()) return false;
+
+ bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ // FIXME: At -O0 we don't have anything that canonicalizes operand order.
+ // Thus, Src1Value may be a ConstantInt, but we're missing it.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+ if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+ SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
+ // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+ // then a cmn, because there is no way to represent 2147483648 as a
+ // signed 32-bit int.
+ if (Imm < 0 && Imm != (int)0x80000000) {
+ isNegativeImm = true;
+ Imm = -Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+ } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+ if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+ if (ConstFP->isZero() && !ConstFP->isNegative())
+ UseImm = true;
+ }
+
unsigned CmpOpc;
- unsigned CondReg;
- switch (VT.SimpleTy) {
+ bool isICmp = true;
+ bool needsExt = false;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
- CmpOpc = ARM::VCMPES;
- CondReg = ARM::FPSCR;
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
- CmpOpc = ARM::VCMPED;
- CondReg = ARM::FPSCR;
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ needsExt = true;
+ // Intentional fall-through.
case MVT::i32:
- CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- CondReg = ARM::CPSR;
+ if (isThumb2) {
+ if (!UseImm)
+ CmpOpc = ARM::t2CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+ } else {
+ if (!UseImm)
+ CmpOpc = ARM::CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+ }
break;
}
- // Get the compare predicate.
- ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+ unsigned SrcReg1 = getRegForValue(Src1Value);
+ if (SrcReg1 == 0) return false;
- // We may not handle every CC for now.
- if (ARMPred == ARMCC::AL) return false;
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(Src2Value);
+ if (SrcReg2 == 0) return false;
+ }
- unsigned Arg1 = getRegForValue(CI->getOperand(0));
- if (Arg1 == 0) return false;
+ // We have i1, i8, or i16, we need to either zero extend or sign extend.
+ if (needsExt) {
+ SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
+ if (SrcReg1 == 0) return false;
+ if (!UseImm) {
+ SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+ if (SrcReg2 == 0) return false;
+ }
+ }
- unsigned Arg2 = getRegForValue(CI->getOperand(1));
- if (Arg2 == 0) return false;
+ if (!UseImm) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(SrcReg1).addReg(SrcReg2));
+ } else {
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(SrcReg1);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(Arg1).addReg(Arg2));
+ // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+ if (isICmp)
+ MIB.addImm(Imm);
+ AddOptionalDefs(MIB);
+ }
// For floating point we need to move the result to a comparison register
// that we can then use for branches.
- if (isFloat)
+ if (Ty->isFloatTy() || Ty->isDoubleTy())
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::FMSTAT)));
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
// Now set a register based on the comparison. Explicitly set the predicates
// here.
- unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
- TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass;
+ unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
unsigned DestReg = createResultReg(RC);
- Constant *Zero
- = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
- .addImm(ARMPred).addReg(CondReg);
+ .addImm(ARMPred).addReg(ARM::CPSR);
UpdateValueMap(I, DestReg);
return true;
@@ -1321,7 +1543,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
return true;
}
-bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
if (!Subtarget->hasVFP2()) return false;
@@ -1330,21 +1552,30 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
if (!isTypeLegal(Ty, DstVT))
return false;
- // FIXME: Handle sign-extension where necessary.
- if (!I->getOperand(0)->getType()->isIntegerTy(32))
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
- unsigned Op = getRegForValue(I->getOperand(0));
- if (Op == 0) return false;
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0) return false;
+
+ // Handle sign-extension.
+ if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
+ EVT DestVT = MVT::i32;
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+ /*isZExt*/!isSigned);
+ if (SrcReg == 0) return false;
+ }
// The conversion routine works on fp-reg to fp-reg and the operand above
// was an integer, move it to the fp registers if possible.
- unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
if (FP == 0) return false;
unsigned Opc;
- if (Ty->isFloatTy()) Opc = ARM::VSITOS;
- else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
+ else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
@@ -1355,7 +1586,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
return true;
}
-bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
if (!Subtarget->hasVFP2()) return false;
@@ -1369,11 +1600,11 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
- if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
- else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
+ else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
- // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
ResultReg)
@@ -1401,22 +1632,54 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
if (CondReg == 0) return false;
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- unsigned Op2Reg = getRegForValue(I->getOperand(2));
- if (Op2Reg == 0) return false;
- unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ // Check to see if we can use an immediate in the conditional move.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
+ assert (VT == MVT::i32 && "Expecting an i32.");
+ Imm = (int)ConstInt->getValue().getZExtValue();
+ if (Imm < 0) {
+ isNegativeImm = true;
+ Imm = ~Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+
+ unsigned Op2Reg = 0;
+ if (!UseImm) {
+ Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+ }
+
+ unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(CondReg).addImm(1));
+ .addReg(CondReg).addImm(0));
+
+ unsigned MovCCOpc;
+ if (!UseImm) {
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
+ } else {
+ if (!isNegativeImm) {
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ } else {
+ MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
+ }
+ }
unsigned ResultReg = createResultReg(RC);
- unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
- .addReg(Op1Reg).addReg(Op2Reg)
- .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::SelectSDiv(const Instruction *I) {
+bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
MVT VT;
Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
@@ -1430,21 +1693,21 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) {
// Otherwise emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i8)
- LC = RTLIB::SDIV_I8;
+ LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
else if (VT == MVT::i16)
- LC = RTLIB::SDIV_I16;
+ LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
else if (VT == MVT::i32)
- LC = RTLIB::SDIV_I32;
+ LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
else if (VT == MVT::i64)
- LC = RTLIB::SDIV_I64;
+ LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
else if (VT == MVT::i128)
- LC = RTLIB::SDIV_I128;
+ LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
return ARMEmitLibcall(I, LC);
}
-bool ARMFastISel::SelectSRem(const Instruction *I) {
+bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
MVT VT;
Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
@@ -1452,21 +1715,59 @@ bool ARMFastISel::SelectSRem(const Instruction *I) {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i8)
- LC = RTLIB::SREM_I8;
+ LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
else if (VT == MVT::i16)
- LC = RTLIB::SREM_I16;
+ LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
else if (VT == MVT::i32)
- LC = RTLIB::SREM_I32;
+ LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
else if (VT == MVT::i64)
- LC = RTLIB::SREM_I64;
+ LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
else if (VT == MVT::i128)
- LC = RTLIB::SREM_I128;
+ LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
return ARMEmitLibcall(I, LC);
}
-bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
+ break;
+ case ISD::OR:
+ Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
+ break;
+ case ISD::SUB:
+ Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // TODO: Often the 2nd operand is an immediate, which can be encoded directly
+ // in the instruction, rather then materializing the value in a register.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
EVT VT = TLI.getValueType(I->getType(), true);
// We can get here in the case when we want to use NEON for our fp
@@ -1478,12 +1779,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
if (isFloat && !Subtarget->hasVFP2())
return false;
- unsigned Op1 = getRegForValue(I->getOperand(0));
- if (Op1 == 0) return false;
-
- unsigned Op2 = getRegForValue(I->getOperand(1));
- if (Op2 == 0) return false;
-
unsigned Opc;
bool is64bit = VT == MVT::f64 || VT == MVT::i64;
switch (ISDOpcode) {
@@ -1498,6 +1793,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
Opc = is64bit ? ARM::VMULD : ARM::VMULS;
break;
}
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
@@ -1508,18 +1809,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
// Call Handling Code
-bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
- EVT SrcVT, unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
- Src, /*TODO: Kill=*/false);
-
- if (RR != 0) {
- ResultReg = RR;
- return true;
- } else
- return false;
-}
-
// This is largely taken directly from CCAssignFnForNode - we don't support
// varargs in FastISel so that part has been removed.
// TODO: We may not support all of this.
@@ -1536,7 +1825,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1548,11 +1837,6 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
- case CallingConv::GHC:
- if (Return)
- llvm_unreachable("Can't return in GHC call convention");
- else
- return CC_ARM_APCS_GHC;
}
}
@@ -1567,6 +1851,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+ // Check that we can handle all of the arguments. If we can't, then bail out
+ // now before we add code to the MBB.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ continue;
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64 ||
+ // TODO: Only handle register args for now.
+ !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+ return false;
+ } else {
+ switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ }
+ }
+ }
+
+ // At the point, we are able to handle the call's arguments in fast isel.
+
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
@@ -1582,41 +1908,26 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
- // We don't handle NEON/vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
- return false;
+ assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+ "We don't handle NEON/vector parameters yet.");
// Handle arg promotion, etc.
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt: {
- bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
- Emitted = true;
- ArgVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
break;
}
+ case CCValAssign::AExt:
+ // Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
- bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
- Emitted = true;
- ArgVT = VA.getLocVT();
- break;
- }
- case CCValAssign::AExt: {
- bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- if (!Emitted)
- Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- if (!Emitted)
- Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
-
- assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
- ArgVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
break;
}
case CCValAssign::BCvt: {
@@ -1634,16 +1945,17 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg())
- .addReg(Arg);
+ .addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// TODO: We need custom lowering for vector (v2f64) args.
- if (VA.getLocVT() != MVT::f64) return false;
+ assert(VA.getLocVT() == MVT::f64 &&
+ "Custom lowering for v2f64 args not available");
CCValAssign &NextVA = ArgLocs[++i];
- // TODO: Only handle register args for now.
- if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "We only handle register args!");
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVRRD), VA.getLocReg())
@@ -1659,9 +1971,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
Addr.Base.Reg = ARM::SP;
Addr.Offset = VA.getLocMemOffset();
- if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+ assert(EmitRet && "Could not emit a store for argument!");
}
}
+
return true;
}
@@ -1685,7 +1999,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// For this move we copy into two registers and then move into the
// double fp reg we want.
EVT DestVT = RVLocs[0].getValVT();
- TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
unsigned ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVDRR), ResultReg)
@@ -1700,7 +2014,12 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
EVT CopyVT = RVLocs[0].getValVT();
- TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
unsigned ResultReg = createResultReg(DstRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1753,13 +2072,26 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
- // TODO: For now, don't try to handle cases where getLocInfo()
- // says Full but the types don't match.
- if (TLI.getValueType(RV->getType()) != VA.getValVT())
- return false;
- // Make the copy.
unsigned SrcReg = Reg + VA.getValNo();
+ EVT RVVT = TLI.getValueType(RV->getType());
+ EVT DestVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (RVVT != DestVT) {
+ if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+ return false;
+
+ assert(DestVT == MVT::i32 && "ARM should always ext to i32");
+
+ // Perform extension if flagged as either zext or sext. Otherwise, do
+ // nothing.
+ if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
+ SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
+ if (SrcReg == 0) return false;
+ }
+ }
+
+ // Make the copy.
unsigned DstReg = VA.getLocReg();
const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
@@ -1772,20 +2104,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
MRI.addLiveOut(VA.getLocReg());
}
- unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(RetOpc)));
return true;
}
unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
-
- // Darwin needs the r9 versions of the opcodes.
- bool isDarwin = Subtarget->isTargetDarwin();
- if (isThumb) {
- return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ if (isThumb2) {
+ return ARM::tBL;
} else {
- return isDarwin ? ARM::BLr9 : ARM::BL;
+ return ARM::BL;
}
}
@@ -1844,11 +2173,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLr9 for darwin, BL otherwise.
- // TODO: Turn this into the table of arm call ops.
+ // Issue the call.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(NULL);
- if(isThumb)
+ if (isThumb2)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
@@ -1863,6 +2191,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -1873,12 +2205,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return true;
}
-bool ARMFastISel::SelectCall(const Instruction *I) {
+bool ARMFastISel::SelectCall(const Instruction *I,
+ const char *IntrMemName = 0) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
- // Can't handle inline asm or worry about intrinsics yet.
- if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee)) return false;
// Only handle global variable Callees.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
@@ -1902,7 +2235,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT))
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8 && RetVT != MVT::i1)
return false;
// TODO: For now if we have long calls specified we don't handle the call.
@@ -1913,16 +2247,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
SmallVector<unsigned, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgRegs.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgRegs.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- unsigned Arg = getRegForValue(*i);
+ // If we're lowering a memory intrinsic instead of a regular call, skip the
+ // last two arguments, which shouldn't be passed to the underlying function.
+ if (IntrMemName && e-i <= 2)
+ break;
- if (Arg == 0)
- return false;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1930,7 +2266,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
- // FIXME: Only handle *easy* calls for now.
+ // FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
@@ -1939,8 +2275,14 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
Type *ArgTy = (*i)->getType();
MVT ArgVT;
- if (!isTypeLegal(ArgTy, ArgVT))
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
+ ArgVT != MVT::i1)
return false;
+
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
@@ -1956,26 +2298,38 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLr9 for darwin, BL otherwise.
- // TODO: Turn this into the table of arm call ops.
+ // Issue the call.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(GV);
// Explicitly adding the predicate here.
- if(isThumb)
+ if(isThumb2) {
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc)))
- .addGlobalAddress(GV, 0, 0);
- else
- // Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addGlobalAddress(GV, 0, 0));
-
+ TII.get(CallOpc)));
+ if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
+ else
+ MIB.addExternalSymbol(IntrMemName, 0);
+ } else {
+ if (!IntrMemName)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
+ else
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(IntrMemName, 0));
+ }
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -1984,83 +2338,187 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
return true;
+}
+bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
+ return Len <= 16;
}
-bool ARMFastISel::SelectIntCast(const Instruction *I) {
- // On ARM, in general, integer casts don't involve legal types; this code
- // handles promotable integers. The high bits for a type smaller than
- // the register size are assumed to be undefined.
- Type *DestTy = I->getType();
- Value *Op = I->getOperand(0);
- Type *SrcTy = Op->getType();
+bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
+ uint64_t Len) {
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!ARMIsMemCpySmall(Len))
+ return false;
- EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(SrcTy, true);
- DestVT = TLI.getValueType(DestTy, true);
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
- if (isa<TruncInst>(I)) {
- if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
- return false;
- if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ bool RV;
+ unsigned ResultReg;
+ RV = ARMEmitLoad(VT, ResultReg, Src);
+ assert (RV == true && "Should be able to handle this load.");
+ RV = ARMEmitStore(VT, ResultReg, Dest);
+ assert (RV == true && "Should be able to handle this store.");
+ (void)RV;
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ Dest.Offset += Size;
+ Src.Offset += Size;
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ // Don't handle volatile.
+ if (MTI.isVolatile())
return false;
- unsigned SrcReg = getRegForValue(Op);
- if (!SrcReg) return false;
+ // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
+ // we would emit dead code because we don't currently handle memmoves.
+ bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ // Small memcpy's are common enough that we want to do them without a call
+ // if possible.
+ uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+ if (ARMIsMemCpySmall(Len)) {
+ Address Dest, Src;
+ if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+ !ARMComputeAddress(MTI.getRawSource(), Src))
+ return false;
+ if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+ return true;
+ }
+ }
+
+ if (!MTI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ return false;
- // Because the high bits are undefined, a truncate doesn't generate
- // any code.
- UpdateValueMap(I, SrcReg);
- return true;
+ const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
+ return SelectCall(&I, IntrMemName);
}
- if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+ // Don't handle volatile.
+ if (MSI.isVolatile())
+ return false;
+
+ if (!MSI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return SelectCall(&I, "memset");
+ }
+ }
+}
+
+bool ARMFastISel::SelectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(Op->getType(), true);
+ DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+ bool isZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return 0;
+
unsigned Opc;
- bool isZext = isa<ZExtInst>(I);
bool isBoolZext = false;
- if (!SrcVT.isSimple())
- return false;
+ if (!SrcVT.isSimple()) return 0;
switch (SrcVT.getSimpleVT().SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::i16:
- if (!Subtarget->hasV6Ops()) return false;
- if (isZext)
- Opc = isThumb ? ARM::t2UXTH : ARM::UXTH;
+ if (!Subtarget->hasV6Ops()) return 0;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
else
- Opc = isThumb ? ARM::t2SXTH : ARM::SXTH;
+ Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case MVT::i8:
- if (!Subtarget->hasV6Ops()) return false;
- if (isZext)
- Opc = isThumb ? ARM::t2UXTB : ARM::UXTB;
+ if (!Subtarget->hasV6Ops()) return 0;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
else
- Opc = isThumb ? ARM::t2SXTB : ARM::SXTB;
+ Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case MVT::i1:
- if (isZext) {
- Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ if (isZExt) {
+ Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
isBoolZext = true;
break;
}
- return false;
+ return 0;
}
- // FIXME: We could save an instruction in many cases by special-casing
- // load instructions.
- unsigned SrcReg = getRegForValue(Op);
- if (!SrcReg) return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(SrcReg);
if (isBoolZext)
MIB.addImm(1);
else
MIB.addImm(0);
AddOptionalDefs(MIB);
- UpdateValueMap(I, DestReg);
+ return ResultReg;
+}
+
+bool ARMFastISel::SelectIntExt(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers.
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(SrcTy, true);
+ DestVT = TLI.getValueType(DestTy, true);
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+ if (ResultReg == 0) return false;
+ UpdateValueMap(I, ResultReg);
return true;
}
@@ -2074,6 +2532,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return SelectStore(I);
case Instruction::Br:
return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
case Instruction::ICmp:
case Instruction::FCmp:
return SelectCmp(I);
@@ -2082,42 +2542,105 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::FPTrunc:
return SelectFPTrunc(I);
case Instruction::SIToFP:
- return SelectSIToFP(I);
+ return SelectIToFP(I, /*isSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*isSigned*/ false);
case Instruction::FPToSI:
- return SelectFPToSI(I);
+ return SelectFPToI(I, /*isSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*isSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::FAdd:
- return SelectBinaryOp(I, ISD::FADD);
+ return SelectBinaryFPOp(I, ISD::FADD);
case Instruction::FSub:
- return SelectBinaryOp(I, ISD::FSUB);
+ return SelectBinaryFPOp(I, ISD::FSUB);
case Instruction::FMul:
- return SelectBinaryOp(I, ISD::FMUL);
+ return SelectBinaryFPOp(I, ISD::FMUL);
case Instruction::SDiv:
- return SelectSDiv(I);
+ return SelectDiv(I, /*isSigned*/ true);
+ case Instruction::UDiv:
+ return SelectDiv(I, /*isSigned*/ false);
case Instruction::SRem:
- return SelectSRem(I);
+ return SelectRem(I, /*isSigned*/ true);
+ case Instruction::URem:
+ return SelectRem(I, /*isSigned*/ false);
case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return SelectIntrinsicCall(*II);
return SelectCall(I);
case Instruction::Select:
return SelectSelect(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
+ return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
- return SelectIntCast(I);
+ return SelectIntExt(I);
default: break;
}
return false;
}
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// successful.
+bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ // ldrb r1, [r0] ldrb r1, [r0]
+ // uxtb r2, r1 =>
+ // mov r3, r2 mov r3, r1
+ bool isZExt = true;
+ switch(MI->getOpcode()) {
+ default: return false;
+ case ARM::SXTH:
+ case ARM::t2SXTH:
+ isZExt = false;
+ case ARM::UXTH:
+ case ARM::t2UXTH:
+ if (VT != MVT::i16)
+ return false;
+ break;
+ case ARM::SXTB:
+ case ARM::t2SXTB:
+ isZExt = false;
+ case ARM::UXTB:
+ case ARM::t2UXTB:
+ if (VT != MVT::i8)
+ return false;
+ break;
+ }
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+ return false;
+ MI->eraseFromParent();
+ return true;
+}
+
namespace llvm {
- llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
- // Completely untested on non-darwin.
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ // Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() &&
!DisableARMFastISel)
return new ARMFastISel(funcInfo);
return 0;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 412751b84d41..402ecb0c5ffd 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,31 +15,40 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
+ cl::desc("Align ARM NEON spills in prolog and epilog"));
+
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs);
+
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetDarwin())
+ // iOS requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetIOS())
return true;
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
- return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -72,7 +81,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
if (Reg == CSRegs[i])
return true;
@@ -81,7 +90,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
- const unsigned *CSRegs) {
+ const uint16_t *CSRegs) {
// Integer spill area is handled with "pop".
if (MI->getOpcode() == ARM::LDMIA_RET ||
MI->getOpcode() == ARM::t2LDMIA_RET ||
@@ -140,10 +149,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// belongs to which callee-save spill areas.
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
-
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
- return;
+ int D8SpillFI = 0;
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
@@ -177,7 +183,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetDarwin()) {
+ if (STI.isTargetIOS()) {
AFI->addGPRCalleeSavedArea2Frame(FI);
GPRCS2Size += 4;
} else {
@@ -186,8 +192,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
break;
default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
- DPRCSSize += 8;
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
}
}
@@ -195,8 +206,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (GPRCS1Size > 0) MBBI++;
// Set FP to point to the stack slot that contains the previous FP.
- // For Darwin, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not Darwin, all the callee-saved registers go
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
bool HasFP = hasFP(MF);
@@ -232,7 +243,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI++;
}
- NumBytes = DPRCSOffset;
+ // Move past the aligned DPRCS2 area.
+ if (AFI->getNumAlignedDPRCS2Regs() > 0) {
+ MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
+ // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
+ // leaves the stack pointer pointing to the DPRCS2 area.
+ //
+ // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
+ NumBytes += MFI->getObjectOffset(D8SpillFI);
+ } else
+ NumBytes = DPRCSOffset;
+
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -259,7 +280,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// If we need dynamic stack realignment, do it here. Be paranoid and make
// sure if we also have VLAs, we have a base pointer for frame access.
- if (RegInfo->needsStackRealignment(MF)) {
+ // If aligned NEON registers were spilled, the stack has already been
+ // realigned.
+ if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
if (!AFI->isThumbFunction()) {
@@ -315,8 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getDesc().isReturn() &&
- "Can only insert epilog into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -332,16 +354,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
- return;
-
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
--MBBI;
@@ -365,7 +383,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARMCC::AL, 0, TII);
else {
// It's not possible to restore SP from FP in a single instruction.
- // For Darwin, this looks like:
+ // For iOS, this looks like:
// mov sp, r7
// sub sp, #24
// This is bad, if an interrupt is taken after the mov, sp is in an
@@ -404,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
- RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
- unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
- ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
- : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode = STI.isThumb() ?
+ (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+ ARM::TAILJMPd;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
@@ -431,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
addReg(JumpTarget.getReg(), RegState::Kill);
- } else if (RetOpcode == ARM::TCRETURNriND) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
- addReg(JumpTarget.getReg(), RegState::Kill);
}
MachineInstr *NewMI = prior(MBBI);
@@ -481,6 +494,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
else if (AFI->isDPRCalleeSavedAreaFrame(FI))
return Offset - AFI->getDPRCalleeSavedAreaOffset();
+ // SP can move around if there are allocas. We may also lose track of SP
+ // when emergency spilling inside a non-reserved call frame setup.
+ bool hasMovingSP = !hasReservedCallFrame(MF);
+
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
if (RegInfo->needsStackRealignment(MF)) {
@@ -488,7 +505,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
if (isFixed) {
FrameReg = RegInfo->getFrameRegister(MF);
Offset = FPOffset;
- } else if (MFI->hasVarSizedObjects()) {
+ } else if (hasMovingSP) {
assert(RegInfo->hasBasePointer(MF) &&
"VLAs and dynamic stack alignment, but missing base pointer!");
FrameReg = RegInfo->getBaseRegister();
@@ -500,11 +517,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
if (hasFP(MF) && AFI->hasStackFrame()) {
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs (and thus the SP isn't reliable as a base).
- if (isFixed || (MFI->hasVarSizedObjects() &&
- !RegInfo->hasBasePointer(MF))) {
+ if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
- } else if (MFI->hasVarSizedObjects()) {
+ } else if (hasMovingSP) {
assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
if (AFI->isThumb2Function()) {
// Try to use the frame pointer if we can, else use the base pointer
@@ -551,6 +567,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned StmOpc, unsigned StrOpc,
bool NoGap,
bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
@@ -564,7 +581,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+ if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+ // D-registers in the aligned area DPRCS2 are NOT spilled here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
// Add the callee-saved register as live-in unless it's LR and
// @llvm.returnaddress is called. If LR is returned for
@@ -614,16 +635,15 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
unsigned LdmOpc, unsigned LdrOpc,
bool isVarArg, bool NoGap,
- bool(*Func)(unsigned, bool)) const {
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
- RetOpcode == ARM::TCRETURNdiND ||
- RetOpcode == ARM::TCRETURNri ||
- RetOpcode == ARM::TCRETURNriND);
+ RetOpcode == ARM::TCRETURNri);
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -632,7 +652,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+ if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+ // The aligned reloads from area DPRCS2 are not inserted here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
Reg = ARM::PC;
@@ -686,6 +710,247 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
}
}
+/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. Also insert stack realignment code and leave the stack
+/// pointer pointing to the d8 spill slot.
+static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // Mark the D-register spill slots as properly aligned. Since MFI computes
+ // stack slot layout backwards, this can actually mean that the d-reg stack
+ // slot offsets can be wrong. The offset for d8 will always be correct.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned DNum = CSI[i].getReg() - ARM::D8;
+ if (DNum >= 8)
+ continue;
+ int FI = CSI[i].getFrameIdx();
+ // The even-numbered registers will be 16-byte aligned, the odd-numbered
+ // registers will be 8-byte aligned.
+ MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+
+ // The stack slot for D8 needs to be maximally aligned because this is
+ // actually the point where we align the stack pointer. MachineFrameInfo
+ // computes all offsets relative to the incoming stack pointer which is a
+ // bit weird when realigning the stack. Any extra padding for this
+ // over-alignment is not realized because the code inserted below adjusts
+ // the stack pointer by numregs * 8 before aligning the stack pointer.
+ if (DNum == 0)
+ MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+ }
+
+ // Move the stack pointer to the d8 spill slot, and align it at the same
+ // time. Leave the stack slot address in the scratch register r4.
+ //
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ //
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+ AFI->setShouldRestoreSPFromFP(true);
+
+ // sub r4, sp, #numregs * 8
+ // The immediate is <= 64, so it doesn't need any special encoding.
+ unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)));
+
+ // bic r4, r4, #align-1
+ Opc = isThumb ? ARM::t2BICri : ARM::BICri;
+ unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign - 1)));
+
+ // mov sp, r4
+ // The stack pointer must be adjusted before spilling anything, otherwise
+ // the stack slots could be clobbered by an interrupt handler.
+ // Leave r4 live, it is used below.
+ Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
+ .addReg(ARM::R4);
+ MIB = AddDefaultPred(MIB);
+ if (!isThumb)
+ AddDefaultCC(MIB);
+
+ // Now spill NumAlignedDPRCS2Regs registers starting from d8.
+ // r4 holds the stack slot address.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
+ // The writeback is only needed when emitting two vst1.64 instructions.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
+ ARM::R4)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4).addImm(16).addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vst1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QPRRegisterClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vstr.64 for the odd last register.
+ if (NumAlignedDPRCS2Regs) {
+ MBB.addLiveIn(NextReg);
+ // vstr.64 uses addrmode5 which has an offset scale of 4.
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ }
+
+ // The last spill instruction inserted should kill the scratch register r4.
+ llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
+/// iterator to the following instruction.
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs) {
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ ++MI; ++MI; ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+
+ // These switches all fall through.
+ switch(NumAlignedDPRCS2Regs) {
+ case 7:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ default:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ case 1:
+ case 2:
+ case 4:
+ assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
+ ++MI;
+ }
+ return MI;
+}
+
+/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. These instructions are assumed to execute while the
+/// stack is still aligned, unlike the code inserted by emitPopInst.
+static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Find the frame index assigned to d8.
+ int D8SpillFI = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ if (CSI[i].getReg() == ARM::D8) {
+ D8SpillFI = CSI[i].getFrameIdx();
+ break;
+ }
+
+ // Materialize the address of the d8 spill slot into the scratch register r4.
+ // This can be fairly complicated if the stack frame is large, so just use
+ // the normal frame index elimination mechanism to do it. This code runs as
+ // the initial part of the epilog where the stack and base pointers haven't
+ // been changed yet.
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI).addImm(0)));
+
+ // Now restore NumAlignedDPRCS2Regs registers starting from d8.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vld1.64 with 4 d-regs and writeback.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vld1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QPRRegisterClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4).addImm(16));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vldr.64 for the remaining odd register.
+ if (NumAlignedDPRCS2Regs)
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+
+ // Last store kills r4.
+ llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -700,12 +965,19 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned PushOneOpc = AFI->isThumbFunction() ?
ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
- MachineInstr::FrameSetup);
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+
+ // The code above does not insert spill code for the aligned DPRCS2 registers.
+ // The stack realignment code will be inserted between the push instructions
+ // and these spills.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
return true;
}
@@ -720,15 +992,22 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+
+ // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
+ // registers. Do that here instead.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
- emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea2Register);
+ &isARMArea2Register, 0);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea1Register);
+ &isARMArea1Register, 0);
return true;
}
@@ -852,6 +1131,55 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
return Limit;
}
+// In functions that realign the stack, it can be an advantage to spill the
+// callee-saved vector registers after realigning the stack. The vst1 and vld1
+// instructions take alignment hints that can improve performance.
+//
+static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
+ if (!SpillAlignedNEONRegs)
+ return;
+
+ // Naked functions don't spill callee-saved registers.
+ if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ return;
+
+ // We are planning to use NEON instructions vst1 / vld1.
+ if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+ return;
+
+ // Don't bother if the default stack alignment is sufficiently high.
+ if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+ return;
+
+ // Aligned spills require stack realignment.
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (!RegInfo->canRealignStack(MF))
+ return;
+
+ // We always spill contiguous d-registers starting from d8. Count how many
+ // needs spilling. The register allocator will almost always use the
+ // callee-saved registers in order, but it can happen that there are holes in
+ // the range. Registers above the hole will be spilled to the standard DPRCS
+ // area.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned NumSpills = 0;
+ for (; NumSpills < 8; ++NumSpills)
+ if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+ break;
+
+ // Don't do this for just one d-register. It's not worth it.
+ if (NumSpills < 2)
+ return;
+
+ // Spill the first NumSpills D-registers after realigning the stack.
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
+
+ // A scratch register is required for the vst1 / vld1 instructions.
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+}
+
void
ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -898,28 +1226,22 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MF.getRegInfo().setPhysRegUsed(ARM::R4);
}
+ // See if we can spill vector registers to aligned stack.
+ checkNumAlignedDPRCS2Regs(MF);
+
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
Spilled = true;
CanEliminateFrame = false;
- } else {
- // Check alias registers too.
- for (const unsigned *Aliases =
- RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
- if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
- Spilled = true;
- CanEliminateFrame = false;
- }
- }
}
if (!ARM::GPRRegisterClass->contains(Reg))
@@ -928,7 +1250,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
- if (!STI.isTargetDarwin()) {
+ if (!STI.isTargetIOS()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -948,7 +1270,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
break;
}
} else {
- if (!STI.isTargetDarwin()) {
+ if (!STI.isTargetIOS()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 61bb8afa40f2..a1c2b93562c9 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -63,12 +63,13 @@ public:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
unsigned StrOpc, bool NoGap,
- bool(*Func)(unsigned, bool),
+ bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags = 0) const;
void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
unsigned LdrOpc, bool isVarArg, bool NoGap,
- bool(*Func)(unsigned, bool)) const;
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const;
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a279187..a5fd15b6bb97 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugValue()) {
- if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
- return Hazard;
-
// Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
// a VMLA / VMLS will cause 4 cycle stall.
const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
// Skip over one non-VFP / NEON instruction.
- if (!LastMCID.isBarrier() &&
+ if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
void ARMHazardRecognizer::Reset() {
LastMI = 0;
FpMLxStalls = 0;
- ITBlockSize = 0;
ScoreboardHazardRecognizer::Reset();
}
void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
- unsigned Opcode = MI->getOpcode();
- if (ITBlockSize) {
- --ITBlockSize;
- } else if (Opcode == ARM::t2IT) {
- unsigned Mask = MI->getOperand(1).getImm();
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- ITBlockSize = 4 - NumTZ;
- MachineBasicBlock::iterator I = MI;
- for (unsigned i = 0; i < ITBlockSize; ++i) {
- // Advance to the next instruction, skipping any dbg_value instructions.
- do {
- ++I;
- } while (I->isDebugValue());
- ITBlockMIs[ITBlockSize-1-i] = &*I;
- }
- }
-
if (!MI->isDebugValue()) {
LastMI = MI;
FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d8566b..98bfc4cf0cc5 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
class ARMSubtarget;
class MachineInstr;
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
MachineInstr *LastMI;
unsigned FpMLxStalls;
- unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
- MachineInstr *ITBlockMIs[4];
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
const ARMSubtarget &sti,
const ScheduleDAG *DAG) :
ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+ TRI(tri), STI(sti), LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 5ee009c04c5b..1eafbbc8f64d 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -244,6 +244,7 @@ private:
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
+ SDNode *SelectConditionalOp(SDNode *N);
SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -923,7 +924,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
// The maximum alignment is equal to the memory size being referenced.
unsigned LSNAlign = LSN->getAlignment();
unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
- if (LSNAlign > MemSize && MemSize > 1)
+ if (LSNAlign >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
@@ -1549,6 +1550,52 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
return CurDAG->getTargetConstant(Alignment, MVT::i32);
}
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
+ case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
+ case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
+ case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
+ case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
+ case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
+ case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
+ case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+
+ case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
+ case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
+ case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
+ case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
+ case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
+ case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
+ case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
+ case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
+ case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
+ case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
+ case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
+ case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
+
+ case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+ case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+ case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
@@ -1612,7 +1659,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -1754,7 +1809,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
Ops.push_back(SrcReg);
Ops.push_back(Pred);
@@ -1977,8 +2040,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (isUpdating) {
+ // fixed-stride update instructions don't have an explicit writeback
+ // operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if (!isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(Inc);
+ // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+ else if (NumVecs > 2)
+ Ops.push_back(Reg0);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -2116,7 +2185,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
default:
llvm_unreachable("Unknown so_reg opcode!");
- break;
}
SDValue SOShImm =
CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
@@ -2227,9 +2295,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
// Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
// Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
// Pattern complexity = 18 cost = 1 size = 0
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
if (Subtarget->isThumb()) {
SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
@@ -2286,8 +2351,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(false && "Illegal conditional move type!");
- break;
+ default: llvm_unreachable("Illegal conditional move type!");
case MVT::i32:
Opc = Subtarget->isThumb()
? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
@@ -2303,6 +2367,115 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ SDValue CCR = N->getOperand(3);
+ assert(CCR.getOpcode() == ISD::Register);
+ SDValue InFlag = N->getOperand(4);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if (Subtarget->isThumb()) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
+ case ARMISD::COR: Opc = ARM::t2ORRCCrs; break;
+ case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
+ }
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+ }
+
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (T) {
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
+ case ARMISD::COR: Opc = ARM::t2ORRCCri; break;
+ case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
+ }
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+ }
+ }
+
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
+ case ARMISD::COR: Opc = ARM::t2ORRCCrr; break;
+ case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
+ }
+ SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+ }
+
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
+ case ARMISD::COR: Opc = ARM::ORRCCrsi; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
+ }
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+ }
+
+ if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
+ case ARMISD::COR: Opc = ARM::ORRCCrsr; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
+ }
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
+ }
+
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (T) {
+ unsigned TrueImm = T->getZExtValue();
+ if (is_so_imm(TrueImm)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCri; break;
+ case ARMISD::COR: Opc = ARM::ORRCCri; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCri; break;
+ }
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+ }
+ }
+
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
+ case ARMISD::COR: Opc = ARM::ORRCCrr; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
+ }
+ SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+}
+
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2316,7 +2489,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc0 = N->getOperand(0);
SDValue XORSrc1 = N->getOperand(1);
- DebugLoc DL = N->getDebugLoc();
EVT VT = N->getValueType(0);
if (DisableARMIntABS)
@@ -2641,6 +2813,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
+ case ARMISD::CAND:
+ case ARMISD::COR:
+ case ARMISD::CXOR:
+ return SelectConditionalOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -2649,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VZIPd32; break;
+ // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VZIPq8; break;
case MVT::v8i16: Opc = ARM::VZIPq16; break;
case MVT::v4f32:
@@ -2668,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VUZPd32; break;
+ // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VUZPq8; break;
case MVT::v8i16: Opc = ARM::VUZPq16; break;
case MVT::v4f32:
@@ -2715,8 +2893,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
- ARM::VLD2DUPd32Pseudo };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
@@ -2733,8 +2911,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
- ARM::VLD2DUPd32Pseudo_UPD };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
@@ -2751,24 +2929,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD1_UPD: {
- unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
- ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
- unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
- ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed,
+ ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed };
+ unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed,
+ ARM::VLD1q16wb_fixed,
+ ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed };
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
- ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
- ARM::VLD2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed,
+ ARM::VLD2d16wb_fixed,
+ ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD3_UPD: {
unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
- ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
@@ -2780,7 +2963,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VLD4_UPD: {
unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
- ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
@@ -2815,24 +2998,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST1_UPD: {
- unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
- ARM::VST1d32_UPD, ARM::VST1d64_UPD };
- unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
- ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed,
+ ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed };
+ unsigned QOpcodes[] = { ARM::VST1q8wb_fixed,
+ ARM::VST1q16wb_fixed,
+ ARM::VST1q32wb_fixed,
+ ARM::VST1q64wb_fixed };
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
- ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
- ARM::VST2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST2d8wb_fixed,
+ ARM::VST2d16wb_fixed,
+ ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
@@ -2844,7 +3032,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VST4_UPD: {
unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
@@ -2993,14 +3181,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
- ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64};
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
- ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+ ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
@@ -3054,14 +3242,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
- ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
- ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
@@ -3122,14 +3310,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
case Intrinsic::arm_neon_vtbl2:
- return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
case Intrinsic::arm_neon_vtbl3:
return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
case Intrinsic::arm_neon_vtbl4:
return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
case Intrinsic::arm_neon_vtbx2:
- return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
case Intrinsic::arm_neon_vtbx3:
return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
case Intrinsic::arm_neon_vtbx4:
@@ -3163,7 +3351,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
Ops.data(), Ops.size());
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f60d177a920b..a103c94cede4 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,13 +13,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
-#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
@@ -40,18 +39,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <sstream>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -73,7 +69,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-namespace llvm {
+namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -89,7 +85,7 @@ namespace llvm {
}
// The APCS parameter registers.
-static const unsigned GPRArgRegs[] = {
+static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -108,8 +104,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- if (ElemTy != MVT::i32) {
+ if (ElemTy == MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ } else {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
@@ -121,18 +123,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction(VT.getSimpleVT(),
- (MVT::SimpleValueType)InnerVT, Expand);
}
- setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -263,7 +259,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
@@ -388,8 +384,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Long long helper functions
// RTABI chapter 4.2, Table 9
setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
- setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
- setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
@@ -405,21 +399,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
// Memory operations
// RTABI chapter 4.3.4
setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
+ setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
@@ -433,7 +434,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -441,6 +443,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
@@ -457,13 +472,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -476,13 +501,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
- setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -498,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
- // a destination type that is wider than the source.
+ // a destination type that is wider than the source, and nor does
+ // it have a FP_TO_[SU]INT instruction with a narrower destination than
+ // source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -519,6 +558,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
+
+ // It is legal to extload from v4i8 to v4i16 or v4i32.
+ MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16,
+ MVT::v2i32};
+ for (unsigned i = 0; i < 6; ++i) {
+ setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ }
}
computeRegisterProperties();
@@ -576,6 +625,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -606,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setExceptionPointerRegister(ARM::R0);
- setExceptionSelectorRegister(ARM::R1);
+
+ if (!Subtarget->isTargetDarwin()) {
+ // Non-Darwin platforms may return values in these registers via the
+ // personality function.
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+ }
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
@@ -664,7 +722,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -676,7 +735,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
- setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
}
@@ -703,18 +761,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ }
// Various VFP goodness
- if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
if (Subtarget->hasVFP2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -735,20 +796,27 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
- if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
- setTargetDAGCombine(ISD::OR);
- if (Subtarget->hasNEON())
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
+ }
+
+ if (Subtarget->hasV6Ops())
+ setTargetDAGCombine(ISD::SRL);
setStackPointerRegisterToSaveRestore(ARM::SP);
- if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+ maxStoresPerMemset = 16;
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
@@ -828,7 +896,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+
case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CAND: return "ARMISD::CAND";
+ case ARMISD::COR: return "ARMISD::COR";
+ case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
@@ -851,7 +923,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
- case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
@@ -899,6 +970,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
@@ -949,7 +1021,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
@@ -984,7 +1056,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (VT.isFloatingPoint() || VT.isVector())
- return Sched::Latency;
+ return Sched::ILP;
}
if (!N->isMachineOpcode())
@@ -999,7 +1071,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
if (!Itins->isEmpty() &&
Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
- return Sched::Latency;
+ return Sched::ILP;
return Sched::RegPressure;
}
@@ -1081,18 +1153,19 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
else if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
case CallingConv::ARM_AAPCS_VFP:
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ // Fallthrough
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
- case CallingConv::GHC:
- return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
}
@@ -1215,7 +1288,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
SDValue
ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1334,7 +1407,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
@@ -1350,12 +1423,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
- // TODO: Disable AlwaysInline when it becomes possible
- // to emit a nested call sequence.
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
Flags.getByValAlign(),
/*isVolatile=*/false,
- /*AlwaysInline=*/true,
+ /*AlwaysInline=*/false,
MachinePointerInfo(0),
MachinePointerInfo(0)));
@@ -1429,7 +1500,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1444,7 +1515,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
@@ -1465,7 +1536,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
@@ -1494,7 +1565,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
@@ -1513,12 +1584,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
+ else if (doesNotRet && isDirect && !isARMFunc &&
+ Subtarget->hasRAS() && !Subtarget->isThumb1Only())
+ // "mov lr, pc; b _foo" to avoid confusing the RSP
+ CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
} else {
- CallOpc = (isDirect || Subtarget->hasV5TOps())
- ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
- : ARMISD::CALL_NOLINK;
+ if (!isDirect && !Subtarget->hasV5TOps()) {
+ CallOpc = ARMISD::CALL_NOLINK;
+ } else if (doesNotRet && isDirect && Subtarget->hasRAS())
+ // "mov lr, pc; b _foo" to avoid confusing the RSP
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
}
std::vector<SDValue> Ops;
@@ -1531,6 +1610,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -1558,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
-llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
@@ -1588,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
- const ARMInstrInfo *TII) {
+ const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
@@ -1723,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const ARMInstrInfo *TII =
- ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
@@ -1852,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
return result;
}
-bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
- unsigned NumCopies = 0;
- SDNode* Copies[2];
- SDNode *Use = *N->use_begin();
- if (Use->getOpcode() == ISD::CopyToReg) {
- Copies[NumCopies++] = Use;
- } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
+ SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
- for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ SmallPtrSet<SDNode*, 2> Copies;
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
- Copies[UI.getUse().getResNo()] = *UI;
- ++NumCopies;
+ Copies.insert(*UI);
}
- } else if (Use->getOpcode() == ISD::BITCAST) {
+ if (Copies.size() > 2)
+ return false;
+
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ SDValue UseChain = UI->getOperand(0);
+ if (Copies.count(UseChain.getNode()))
+ // Second CopyToReg
+ Copy = *UI;
+ else
+ // First CopyToReg
+ TCChain = UseChain;
+ }
+ } else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
- if (!Use->hasNUsesOfValue(1, 0))
+ if (!Copy->hasOneUse())
return false;
- Use = *Use->use_begin();
- if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ Copy = *Copy->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
- Copies[NumCopies++] = Use;
+ Chain = Copy->getOperand(0);
} else {
return false;
}
- if (NumCopies != 1 && NumCopies != 2)
- return false;
-
bool HasRet = false;
- for (unsigned i = 0; i < NumCopies; ++i) {
- SDNode *Copy = Copies[i];
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- SDNode *Use = *UI;
- if (Use == Copies[0] || Use == Copies[1])
- continue;
- return false;
- }
- if (UI->getOpcode() != ARMISD::RET_FLAG)
- return false;
- HasRet = true;
- }
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
}
- return HasRet;
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!EnableARMTailCalls)
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
return false;
if (!CI->isTailCall())
@@ -1965,7 +2058,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1989,7 +2082,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2005,7 +2098,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
- 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+ 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
return CallResult.first;
}
@@ -2037,7 +2131,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2045,7 +2139,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
} else {
// local exec model
ARMConstantPoolValue *CPV =
@@ -2054,7 +2148,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -2092,13 +2186,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
return Result;
}
@@ -2115,7 +2210,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
}
}
@@ -2128,7 +2223,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- // FIXME: Enable this for static codegen when tool issues are fixed.
+ // FIXME: Enable this for static codegen when tool issues are fixed. Also
+ // update ARMFastISel::ARMMaterializeGV.
if (Subtarget->useMovt() && RelocM != Reloc::Static) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
@@ -2143,7 +2239,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
return Result;
}
@@ -2163,7 +2260,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
@@ -2173,7 +2270,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
- false, false, 0);
+ false, false, false, 0);
return Result;
}
@@ -2195,20 +2292,12 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue
-ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
- const {
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- Op.getOperand(0), Op.getOperand(1));
-}
-
-SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
@@ -2253,7 +2342,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2366,7 +2455,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = ARM::tGPRRegisterClass;
else
@@ -2385,7 +2474,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ false, false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -2452,7 +2541,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SmallVector<SDValue, 4> MemOps;
for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = ARM::tGPRRegisterClass;
else
@@ -2521,7 +2610,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ false, false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -2535,7 +2624,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
} else {
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (RegVT == MVT::f32)
RC = ARM::SPRRegisterClass;
@@ -2612,7 +2701,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ false, false, false, 0));
}
lastInsIndex = index;
}
@@ -2777,6 +2866,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
+ // undefined bits before doing a full-word comparison with zero.
+ Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
return DAG.getSelectCC(dl, Cond,
DAG.getConstant(0, Cond.getValueType()),
SelectTrue, SelectFalse, ISD::SETNE);
@@ -2847,7 +2941,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
+ Ld->isInvariant(), Ld->getAlignment());
llvm_unreachable("Unknown VFP cmp argument!");
}
@@ -2866,7 +2960,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
Ld->getChain(), Ptr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
+ Ld->isInvariant(), Ld->getAlignment());
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
@@ -2876,7 +2970,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
Ld->getChain(), NewPtr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
- NewAlign);
+ Ld->isInvariant(), NewAlign);
return;
}
@@ -2894,12 +2988,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
DebugLoc dl = Op.getDebugLoc();
- bool SeenZero = false;
- if (canChangeToInt(LHS, SeenZero, Subtarget) &&
- canChangeToInt(RHS, SeenZero, Subtarget) &&
- // If one of the operand is zero, it's safe to ignore the NaN case since
- // we only care about equality comparisons.
- (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+ bool LHSSeenZero = false;
+ bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
+ bool RHSSeenZero = false;
+ bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
+ if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
// If unsafe fp math optimization is enabled and there are no other uses of
// the CMP operands, and the condition code is EQ or NE, we can optimize it
// to an integer comparison.
@@ -2908,10 +3001,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
else if (CC == ISD::SETUNE)
CC = ISD::SETNE;
+ SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
SDValue ARMcc;
if (LHS.getValueType() == MVT::f32) {
- LHS = bitcastf32Toi32(LHS, DAG);
- RHS = bitcastf32Toi32(RHS, DAG);
+ LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(LHS, DAG), Mask);
+ RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(RHS, DAG), Mask);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
@@ -2922,6 +3018,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS1, RHS2;
expandf64Toi32(LHS, DAG, LHS1, LHS2);
expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
+ RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -2950,7 +3048,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
- if (UnsafeFPMath &&
+ if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3000,25 +3098,48 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
MachinePointerInfo::getJumpTable(),
- false, false, 0);
+ false, false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(), false, false, 0);
+ MachinePointerInfo::getJumpTable(),
+ false, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
}
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getValueType().getVectorElementType() == MVT::i32) {
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4i16)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+}
+
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorFP_TO_INT(Op, DAG);
+
DebugLoc dl = Op.getDebugLoc();
unsigned Opc;
switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid opcode!");
+ default: llvm_unreachable("Invalid opcode!");
case ISD::FP_TO_SINT:
Opc = ARMISD::FTOSI;
break;
@@ -3034,6 +3155,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
+ if (VT.getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
"Invalid type for custom lowering!");
if (VT != MVT::v4f32)
@@ -3042,8 +3169,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
unsigned CastOpc;
unsigned Opc;
switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid opcode!");
+ default: llvm_unreachable("Invalid opcode!");
case ISD::SINT_TO_FP:
CastOpc = ISD::SIGN_EXTEND;
Opc = ISD::SINT_TO_FP;
@@ -3067,8 +3193,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
unsigned Opc;
switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid opcode!");
+ default: llvm_unreachable("Invalid opcode!");
case ISD::SINT_TO_FP:
Opc = ARMISD::SITOF;
break;
@@ -3176,7 +3301,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
SDValue Offset = DAG.getConstant(4, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
// Return LR, which contains the return address. Mark it an implicit live-in.
@@ -3197,7 +3322,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return FrameAddr;
}
@@ -3442,7 +3567,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
- default: llvm_unreachable("Illegal FP comparison"); break;
+ default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
case ISD::SETNE: Invert = true; // Fallthrough
case ISD::SETOEQ:
@@ -3481,7 +3606,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
} else {
// Integer comparisons.
switch (SetCCOpcode) {
- default: llvm_unreachable("Illegal integer comparison"); break;
+ default: llvm_unreachable("Illegal integer comparison");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
case ISD::SETLT: Swap = true;
@@ -3688,14 +3813,65 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
default:
llvm_unreachable("unexpected size for isNEONModifiedImm");
- return SDValue();
}
unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
return DAG.getTargetConstant(EncodedVal, MVT::i32);
}
-static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ return SDValue();
+
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ assert(Op.getValueType() == MVT::f32 &&
+ "ConstantFP custom lowering should only occur for f32.");
+
+ // Try splatting with a VMOV.f32...
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ if (ImmVal != -1) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
+ NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // If that fails, try a VMOV.i32
+ EVT VMovVT;
+ unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
+ SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
+ VMOVModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
+ NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // Finally, try a VMVN.i32
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
+ VMVNModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+
+static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
@@ -3734,8 +3910,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
-static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned BlockSize) {
+static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
@@ -3761,15 +3936,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
// We can handle <8 x i8> vector shuffles. If the index in the mask is out of
// range, then 0 is placed into the resulting vector. So pretty much any mask
// of 8 elements can work here.
return VT == MVT::v8i8 && M.size() == 8;
}
-static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3787,8 +3961,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
-static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3803,8 +3976,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3827,8 +3999,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
-static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3852,8 +4023,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3878,8 +4048,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
-static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3955,6 +4124,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
+
+ // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
+ if (ImmVal != -1) {
+ SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+ return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
+ }
+ }
}
}
@@ -4302,7 +4480,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
}
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
- SmallVectorImpl<int> &ShuffleMask,
+ ArrayRef<int> ShuffleMask,
SelectionDAG &DAG) {
// Check to see if we can use the VTBL instruction.
SDValue V1 = Op.getOperand(0);
@@ -4310,7 +4488,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
DebugLoc DL = Op.getDebugLoc();
SmallVector<SDValue, 8> VTBLMask;
- for (SmallVectorImpl<int>::iterator
+ for (ArrayRef<int>::iterator
I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
@@ -4330,7 +4508,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- SmallVector<int, 8> ShuffleMask;
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
@@ -4338,7 +4515,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// of inconsistencies between legalization and selection.
// FIXME: floating-point vectors should be canonicalized to integer vectors
// of the same time so that they get CSEd properly.
- SVN->getMask(ShuffleMask);
+ ArrayRef<int> ShuffleMask = SVN->getMask();
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize <= 32) {
@@ -4347,9 +4524,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
+ // Test if V1 is a SCALAR_TO_VECTOR.
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
}
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, MVT::i32));
}
@@ -4450,6 +4642,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // INSERT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(2);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ return Op;
+}
+
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// EXTRACT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(1);
@@ -4526,11 +4727,10 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
- int64_t SExtVal = C->getSExtValue();
- if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+ if (!isIntN(HalfSize, C->getSExtValue()))
return false;
} else {
- if ((C->getZExtValue() >> HalfSize) != 0)
+ if (!isUIntN(HalfSize, C->getZExtValue()))
return false;
}
continue;
@@ -4569,7 +4769,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
if (N->getOpcode() == ISD::BITCAST) {
@@ -4874,7 +5075,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
- default: assert(0 && "Invalid code");
+ default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = ARMISD::ADDC; break;
case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
case ISD::SUBC: Opc = ARMISD::SUBC; break;
@@ -4959,7 +5160,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
- case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
@@ -4971,8 +5171,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -4986,7 +5188,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
}
- return SDValue();
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -4998,7 +5199,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this!");
- break;
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
@@ -5194,7 +5394,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- TargetRegisterClass *TRC =
+ const TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5304,7 +5504,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- TargetRegisterClass *TRC =
+ const TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -5414,7 +5614,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- TargetRegisterClass *TRC =
+ const TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned storesuccess = MRI.createVirtualRegister(TRC);
@@ -5500,52 +5700,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
-/// EmitBasePointerRecalculation - For functions using a base pointer, we
-/// rematerialize it (via the frame pointer).
-void ARMTargetLowering::
-EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
- MachineFunction &MF = *MI->getParent()->getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
-
- if (!RI.hasBasePointer(MF)) return;
-
- MachineBasicBlock::iterator MBBI = MI;
-
- int32_t NumBytes = AFI->getFramePtrSpillOffset();
- unsigned FramePtr = RI.getFrameRegister(MF);
- assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
- "Base pointer without frame pointer?");
-
- if (AFI->isThumb2Function())
- llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
- else if (AFI->isThumbFunction())
- llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, *AII, RI);
- else
- llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
-
- if (!RI.needsStackRealignment(MF)) return;
-
- // If there's dynamic realignment, adjust for it.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- assert(!AFI->isThumb1OnlyFunction());
-
- // Emit bic r6, r6, MaxAlign
- unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
- AddDefaultCC(
- AddDefaultPred(
- BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
- .addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign - 1)));
-}
-
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -5580,8 +5734,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore, 4, 4);
- EmitBasePointerRecalculation(MI, MBB, DispatchBB);
-
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
// Incoming value: jbuf
@@ -5683,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
unsigned MaxCSNum = 0;
MachineModuleInfo &MMI = MF->getMMI();
- for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
+ ++BB) {
if (!BB->isLandingPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
@@ -5741,12 +5894,10 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
DispatchBB->addSuccessor(DispContBB);
- // Insert and renumber MBBs.
- MachineBasicBlock *Last = &MF->back();
+ // Insert and MBBs.
MF->insert(MF->end(), DispatchBB);
MF->insert(MF->end(), DispContBB);
MF->insert(MF->end(), TrapBB);
- MF->RenumberBlocks(Last);
// Insert code into the entry block that creates and registers the function
// context.
@@ -5757,35 +5908,63 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineMemOperand::MOLoad |
MachineMemOperand::MOVolatile, 4, 4);
+ if (AFI->isThumb1OnlyFunction())
+ BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
+ else if (!Subtarget->hasVFP2())
+ BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
+ else
+ BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+ unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(FIMMOLd));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ } else {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ }
+
BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2)
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
.addJumpTableIndex(MJTI)
.addImm(UId));
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
AddDefaultCC(
AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3)
- .addReg(NewVReg2, RegState::Kill)
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ .addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
- .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg1)
.addJumpTableIndex(MJTI)
.addImm(UId);
@@ -5796,9 +5975,30 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addImm(1)
.addMemOperand(FIMMOLd));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1));
+ }
+
BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
@@ -5847,38 +6047,77 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(FIMMOLd));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill));
+ }
+
BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
AddDefaultCC(
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2)
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3)
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI)
.addImm(UId));
MachineMemOperand *JTMMOLd =
MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
MachineMemOperand::MOLoad, 4, 4);
- unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3)
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
.addImm(0)
.addMemOperand(JTMMOLd));
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
- .addReg(NewVReg4, RegState::Kill)
- .addReg(NewVReg3)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg4)
.addJumpTableIndex(MJTI)
.addImm(UId);
}
@@ -5893,21 +6132,24 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
PrevMBB = CurMBB;
}
+ // N.B. the order the invoke BBs are processed in doesn't matter here.
const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
- const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+ const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
MachineBasicBlock *BB = *I;
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
- for (MachineBasicBlock::succ_iterator
- SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SMBB = *SI;
+ SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ while (!Successors.empty()) {
+ MachineBasicBlock *SMBB = Successors.pop_back_val();
if (SMBB->isLandingPad()) {
BB->removeSuccessor(SMBB);
- SMBB->setIsLandingPad(false);
+ MBBLPads.push_back(SMBB);
}
}
@@ -5919,7 +6161,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
// executed.
for (MachineBasicBlock::reverse_iterator
II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
- if (!II->getDesc().isCall()) continue;
+ if (!II->isCall()) continue;
DenseMap<unsigned, bool> DefRegs;
for (MachineInstr::mop_iterator
@@ -5932,15 +6174,31 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineInstrBuilder MIB(&*II);
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
- if (!TRC->contains(SavedRegs[i])) continue;
- if (!DefRegs[SavedRegs[i]])
- MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
+ unsigned Reg = SavedRegs[i];
+ if (Subtarget->isThumb2() &&
+ !ARM::tGPRRegisterClass->contains(Reg) &&
+ !ARM::hGPRRegisterClass->contains(Reg))
+ continue;
+ else if (Subtarget->isThumb1Only() &&
+ !ARM::tGPRRegisterClass->contains(Reg))
+ continue;
+ else if (!Subtarget->isThumb() &&
+ !ARM::GPRRegisterClass->contains(Reg))
+ continue;
+ if (!DefRegs[Reg])
+ MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
}
break;
}
}
+ // Mark all former landing pads as non-landing pads. The dispatch is the only
+ // landing pad now.
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
+ (*I)->setIsLandingPad(false);
+
// The instruction is gone now.
MI->eraseFromParent();
@@ -6222,20 +6480,28 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
}
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ EmitSjLjDispatchBlock(MI, BB);
+ return BB;
+
case ARM::ABS:
case ARM::t2ABS: {
// To insert an ABS instruction, we have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// source vreg to test against 0, the destination vreg to set,
// the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
+ // true/false values to select between, and a branch opcode to use.
// It transforms
// V1 = ABS V0
// into
// V2 = MOVS V0
// BCC (branch to SinkBB if V0 >= 0)
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
- // SinkBB: V1 = PHI(V2, V3)
+ // SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator BBI = BB;
++BBI;
@@ -6276,19 +6542,19 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(ARM::CPSR, RegState::Define);
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
- BuildMI(BB, dl,
+ BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
// insert rsbri in RSBBB
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
- BuildMI(*RSBBB, RSBBB->begin(), dl,
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
.addReg(NewMovDstReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- // insert PHI in SinkBB,
+ // insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
@@ -6296,7 +6562,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(NewMovDstReg).addMBB(BB);
// remove ABS instruction
- MI->eraseFromParent();
+ MI->eraseFromParent();
// return last added BB
return SinkBB;
@@ -6306,32 +6572,40 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasPostISelHook()) {
+ if (!MI->hasPostISelHook()) {
assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
"Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
return;
}
+ const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
// register to CPSR, and remove the redundant implicit def.
//
- // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+ // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
- MI->setDesc(TII->get(NewOpc));
+ MCID = &TII->get(NewOpc);
+
+ assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+ "converted opcode should be the same except for cc_out");
+
+ MI->setDesc(*MCID);
+
+ // Add the optional cc_out operand
+ MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
}
- unsigned ccOutIdx = MCID.getNumOperands() - 1;
+ unsigned ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -6339,7 +6613,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// since we already have an optional CPSR def.
bool definesCPSR = false;
bool deadCPSR = false;
- for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+ for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
@@ -6513,7 +6787,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
default:
- assert(0 && "Invalid vector element type for padd optimization.");
+ llvm_unreachable("Invalid vector element type for padd optimization.");
}
SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
@@ -6632,41 +6906,115 @@ static SDValue PerformMULCombine(SDNode *N,
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
+ int64_t MulAmt = C->getSExtValue();
unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
DebugLoc DL = N->getDebugLoc();
SDValue Res;
MulAmt >>= ShiftAmt;
- if (isPowerOf2_32(MulAmt - 1)) {
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- Res = DAG.getNode(ISD::ADD, DL, VT,
- V, DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt-1),
- MVT::i32)));
- } else if (isPowerOf2_32(MulAmt + 1)) {
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- Res = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt+1),
- MVT::i32)),
- V);
- } else
- return SDValue();
+
+ if (MulAmt >= 0) {
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt - 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt + 1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+ } else {
+ uint64_t MulAmtAbs = -MulAmt;
+ if (isPowerOf2_32(MulAmtAbs + 1)) {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs + 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs-1),
+ MVT::i32)));
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, MVT::i32),Res);
+
+ } else
+ return SDValue();
+ }
if (ShiftAmt != 0)
- Res = DAG.getNode(ISD::SHL, DL, VT, Res,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ Res = DAG.getNode(ISD::SHL, DL, VT,
+ Res, DAG.getConstant(ShiftAmt, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
}
+static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
+ if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
+ return false;
+
+ SDValue FalseVal = N.getOperand(0);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
+ if (!C)
+ return false;
+ if (AllOnes)
+ return C->isAllOnesValue();
+ return C->isNullValue();
+}
+
+/// formConditionalOp - Combine an operation with a conditional move operand
+/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
+/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
+static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
+ bool Commutable) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ bool isAND = N->getOpcode() == ISD::AND;
+ bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
+ if (!isCand && Commutable) {
+ isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
+ if (isCand)
+ std::swap(N0, N1);
+ }
+ if (!isCand)
+ return SDValue();
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ISD::AND: Opc = ARMISD::CAND; break;
+ case ISD::OR: Opc = ARMISD::COR; break;
+ case ISD::XOR: Opc = ARMISD::CXOR; break;
+ }
+ return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
+ N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
+ N1.getOperand(4));
+}
+
static SDValue PerformANDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
@@ -6697,6 +7045,13 @@ static SDValue PerformANDCombine(SDNode *N,
}
}
+ if (!Subtarget->isThumb1Only()) {
+ // (and x, (cmov -1, y, cond)) => (and.cond x, y)
+ SDValue CAND = formConditionalOp(N, DAG, true);
+ if (CAND.getNode())
+ return CAND;
+ }
+
return SDValue();
}
@@ -6733,6 +7088,13 @@ static SDValue PerformORCombine(SDNode *N,
}
}
+ if (!Subtarget->isThumb1Only()) {
+ // (or x, (cmov 0, y, cond)) => (or.cond x, y)
+ SDValue COR = formConditionalOp(N, DAG, true);
+ if (COR.getNode())
+ return COR;
+ }
+
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
@@ -6881,6 +7243,25 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformXORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ if (!Subtarget->isThumb1Only()) {
+ // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
+ SDValue CXOR = formConditionalOp(N, DAG, true);
+ if (CXOR.getNode())
+ return CXOR;
+ }
+
+ return SDValue();
+}
+
/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
/// the bits being cleared by the AND are not demanded by the BFI.
static SDValue PerformBFICombine(SDNode *N,
@@ -6926,13 +7307,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, MVT::i32));
SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(),
+ LD->isNonTemporal(), LD->isInvariant(),
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
@@ -6967,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
/// ISD::STORE.
static SDValue PerformSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
SDValue StVal = St->getValue();
- if (!ISD::isNormalStore(St) || St->isVolatile())
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ DebugLoc DL = St->getDebugLoc();
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+ if (!ISD::isNormalStore(St))
return SDValue();
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
DebugLoc DL = St->getDebugLoc();
SDValue BasePtr = St->getBasePtr();
@@ -6996,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N,
StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
DebugLoc dl = StVal.getDebugLoc();
SDValue IntVec = StVal.getOperand(0);
@@ -7177,7 +7645,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
if (isIntrinsic) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
- default: assert(0 && "unexpected intrinsic for Neon base update");
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
NumVecs = 1; break;
case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
@@ -7210,7 +7678,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
} else {
isLaneOp = true;
switch (N->getOpcode()) {
- default: assert(0 && "unexpected opcode for Neon base update");
+ default: llvm_unreachable("unexpected opcode for Neon base update");
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
@@ -7703,6 +8171,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
+ // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
+ // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
+ SDValue N1 = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ SDValue N0 = N->getOperand(0);
+ if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
+ DAG.MaskedValueIsZero(N0.getOperand(0),
+ APInt::getHighBitsSet(32, 16)))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7824,7 +8304,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return -0, so vmin can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7846,7 +8326,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return +0, so vmax can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
@@ -7906,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (Res.getNode()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
// Capture demanded bits information that would be otherwise lost.
if (KnownZero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -7931,7 +8410,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
- case ISD::AND: return PerformANDCombine(N, DCI);
+ case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
@@ -8001,6 +8481,41 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
}
}
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+
+ // See if we can use NEON instructions for this...
+ if (IsZeroVal &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+ return MVT::v4i32;
+ } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+ return MVT::v2i32;
+ }
+ }
+
+ // Lowering to i32/i16 if the size permits.
+ if (Size >= 4) {
+ return MVT::i32;
+ } else if (Size >= 2) {
+ return MVT::i16;
+ }
+
+ // Let the target-independent logic figure it out.
+ return MVT::Other;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
@@ -8188,7 +8703,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
if (Scale & 1) return false;
return isPowerOf2_32(Scale);
}
- break;
}
return true;
}
@@ -8198,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// a register against the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(Imm) != -1;
+ return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ // Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
@@ -8388,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
}
void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (KnownZero == 0 && KnownOne == 0) return;
APInt KnownZeroRHS, KnownOneRHS;
- DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
- KnownZeroRHS, KnownOneRHS, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
KnownZero &= KnownZeroRHS;
KnownOne &= KnownOneRHS;
return;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5da9b27fca68..352d98001ddc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -15,6 +15,7 @@
#ifndef ARMISELLOWERING_H
#define ARMISELLOWERING_H
+#include "ARM.h"
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -56,7 +57,11 @@ namespace llvm {
CMPFP, // ARM VFP compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
FMSTAT, // ARM fmstat instruction.
+
CMOV, // ARM conditional move instructions.
+ CAND, // ARM conditional and instructions.
+ COR, // ARM conditional or instructions.
+ CXOR, // ARM conditional xor instructions.
BCC_i64,
@@ -81,7 +86,6 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
- EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
TC_RETURN, // Tail call return pseudo.
@@ -146,6 +150,9 @@ namespace llvm {
VMOVIMM,
VMVNIMM,
+ // Vector move f32 immediate:
+ VMOVFPIMM,
+
// Vector duplicate:
VDUP,
VDUPLANE,
@@ -266,9 +273,14 @@ namespace llvm {
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
- /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -303,7 +315,6 @@ namespace llvm {
SelectionDAG &DAG) const;
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -338,7 +349,7 @@ namespace llvm {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+ virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
@@ -402,7 +413,6 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -424,6 +434,8 @@ namespace llvm {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
@@ -452,7 +464,7 @@ namespace llvm {
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -481,7 +493,7 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
@@ -512,9 +524,6 @@ namespace llvm {
bool signExtend,
ARMCC::CondCodes Cond) const;
- void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB) const;
-
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 7cbc9111dec3..1d38bcf9e843 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,4 +1,4 @@
-//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -174,7 +174,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
// ARM special operands for disassembly only.
//
-def SetEndAsmOperand : AsmOperandClass {
+def SetEndAsmOperand : ImmAsmOperand {
let Name = "SetEndImm";
let ParserMethod = "parseSetEndImm";
}
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
}
//===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
//===----------------------------------------------------------------------===//
// ARM Instruction templates.
@@ -274,6 +290,14 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
class Encoding {
field bits<32> Inst;
+ // Mask of bits that cause an encoding to be UNPREDICTABLE.
+ // If a bit is set, then if the corresponding bit in the
+ // target encoding differs from its value in the "Inst" field,
+ // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
}
class InstARM<AddrMode am, int sz, IndexMode im,
@@ -290,6 +314,32 @@ class InstThumb<AddrMode am, int sz, IndexMode im,
let DecoderNamespace = "Thumb";
}
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class AsmPseudoInst<string asm, dag iops>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "", NoItinerary> {
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let isCodeGenOnly = 0; // So we get asm matcher for it.
+ let AsmString = asm;
+ let isPseudo = 1;
+}
+
+class ARMAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsARM]>;
+class tAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsThumb]>;
+class t2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsThumb2]>;
+class VFP2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[HasVFP2]>;
+class NEONAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[HasNEON]>;
+
+// Pseudo instructions for the code generator.
class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
: InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
GenericDomain, "", itin> {
@@ -481,6 +531,8 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
let Inst{15-12} = Rt;
let Inst{11-4} = 0b00001001;
let Inst{3-0} = Rt2;
+
+ let DecoderMethod = "DecodeSwap";
}
// addrmode1 instructions
@@ -792,7 +844,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
}
// PKH instructions
-def PKHLSLAsmOperand : AsmOperandClass {
+def PKHLSLAsmOperand : ImmAsmOperand {
let Name = "PKHLSLImm";
let ParserMethod = "parsePKHLSLImm";
}
@@ -1550,8 +1602,11 @@ class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
dag oops, dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
: AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ bits<5> fbits;
// size (fixed-point number): sx == 0 ? 16 : 32
let Inst{7} = op5; // sx
+ let Inst{5} = fbits{0};
+ let Inst{3-0} = fbits{4-1};
}
// VFP conversion instructions, if no NEON
@@ -1963,3 +2018,54 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [HasNEON,UseNEONForFP];
}
+
+// VFP/NEON Instruction aliases for type suffices.
+class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
+multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> {
+ let Predicates = [HasNEON] in {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 48da03f63bb9..b8f607eb4c55 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,12 +21,29 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ if (hasNOP()) {
+ NopInst.setOpcode(ARM::NOP);
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ } else {
+ NopInst.setOpcode(ARM::MOVr);
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ }
+}
+
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
default: break;
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index f2c7bdc31be9..5d3e059b7038 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,11 +14,10 @@
#ifndef ARMINSTRUCTIONINFO_H
#define ARMINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "ARM.h"
namespace llvm {
class ARMSubtarget;
@@ -28,6 +27,9 @@ class ARMInstrInfo : public ARMBaseInstrInfo {
public:
explicit ARMInstrInfo(const ARMSubtarget &STI);
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2cf0f09ffc64..3caaa2366123 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
@@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
- SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
-
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
@@ -184,6 +179,8 @@ def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
AssemblerPredicate<"FeatureVFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
+ AssemblerPredicate<"FeatureVFP4">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
@@ -211,16 +208,20 @@ def IsARClass : Predicate<"!Subtarget->isMClass()">,
AssemblerPredicate<"!FeatureMClass">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
-def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
- AssemblerPredicate<"ModeNaCl">;
+def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
+def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed.
+def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision">;
+def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -244,25 +245,28 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg :
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_neg_XFORM>;
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ int64_t Value = -(int)N->getZExtValue();
+ return Value && ARM_AM::getSOImmVal(Value) != -1;
+ }], so_imm_neg_XFORM> {
+ let ParserMatchClass = so_imm_neg_asmoperand;
+}
-def so_imm_not :
- PatLeaf<(imm), [{
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use so_imm.
+def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_not_XFORM>;
+ }], so_imm_not_XFORM> {
+ let ParserMatchClass = so_imm_not_asmoperand;
+}
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
@@ -279,14 +283,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
-/// imm0_65535 - An immediate is in the range [0.65535].
-def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 65536;
-}]> {
- let ParserMatchClass = Imm0_65535AsmOperand;
-}
-
class BinOpWithFlagFrag<dag res> :
PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
@@ -321,6 +317,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
// Operand Definitions.
//
+// Immediate operands with a shared generic asm render method.
+class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+
// Branch target.
// FIXME: rename brtarget to t2_brtarget
def brtarget : Operand<OtherVT> {
@@ -352,13 +351,11 @@ def bltarget : Operand<i32> {
// Call target for ARM. Handles conditional/unconditional
// FIXME: rename bl_target to t2_bltarget?
def bl_target : Operand<i32> {
- // Encoded the same as branch targets.
- let EncoderMethod = "getARMBranchTargetOpValue";
+ let EncoderMethod = "getARMBLTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
def blx_target : Operand<i32> {
- // Encoded the same as branch targets.
let EncoderMethod = "getARMBLXTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
@@ -475,6 +472,7 @@ def shift_so_reg_reg : Operand<i32>, // reg reg imm
let EncoderMethod = "getSORegRegOpValue";
let PrintMethod = "printSORegRegOperand";
let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
@@ -485,13 +483,14 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm
let EncoderMethod = "getSORegImmOpValue";
let PrintMethod = "printSORegImmOperand";
let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits.
-def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; }
+def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; }
def so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getSOImmVal(Imm) != -1;
}]> {
@@ -515,16 +514,60 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
/// imm0_7 predicate - Immediate in the range [0,7].
-def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
}]> {
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
}]> {
@@ -532,33 +575,57 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; }
+def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
}]> {
let ParserMatchClass = Imm0_31AsmOperand;
}
+/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
+def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_32AsmOperand;
+}
+
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
-def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; }
+def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
let ParserMatchClass = Imm0_255AsmOperand;
}
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
// a relocatable expression.
//
// FIXME: This really needs a Thumb version separate from the ARM version.
// While the range is the same, and can thus use the same match class,
// the encoding is different so it should have a different encoder method.
-def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; }
+def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
-def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; }
+def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm <= 0xffffff;
}]> {
@@ -572,6 +639,7 @@ def BitfieldAsmOperand : AsmOperandClass {
let Name = "Bitfield";
let ParserMethod = "parseBitfield";
}
+
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
@@ -670,7 +738,7 @@ def postidx_reg : Operand<i32> {
let DecoderMethod = "DecodePostIdxReg";
let PrintMethod = "printPostIdxRegOperand";
let ParserMatchClass = PostIdxRegAsmOperand;
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops GPRnopc, i32imm);
}
@@ -699,7 +767,7 @@ def am2offset_reg : Operand<i32>,
let PrintMethod = "printAddrMode2OffsetOperand";
// When using this for assembly, it's always as a post-index offset.
let ParserMatchClass = PostIdxRegShiftedAsmOperand;
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops GPRnopc, i32imm);
}
// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
@@ -711,7 +779,7 @@ def am2offset_imm : Operand<i32>,
let EncoderMethod = "getAddrMode2OffsetOpValue";
let PrintMethod = "printAddrMode2OffsetOperand";
let ParserMatchClass = AM2OffsetImmAsmOperand;
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops GPRnopc, i32imm);
}
@@ -799,6 +867,9 @@ def addrmode6dup : Operand<i32>,
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
}
// addrmodepc := pc + reg
@@ -1041,69 +1112,58 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
}
-/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default.
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
///
/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass AsI1_rbin_s_is<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
-
- def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- iir, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]>;
-
- def rsi : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>;
-
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-8} = shift{11-8};
- let Inst{7} = 0;
- let Inst{6-5} = shift{6-5};
- let Inst{4} = 1;
- let Inst{3-0} = shift{3-0};
+/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+
+ def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, iir,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+ let isCommutable = Commutable;
}
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_imm:$shift))]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_reg:$shift))]>;
}
}
-/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
-///
-/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass AsI1_bin_s_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
- def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>;
- def rsi : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>;
+/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>;
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
+ GPR:$Rn))]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
+ GPR:$Rn))]>;
}
}
@@ -1272,10 +1332,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{4} = 0;
let Inst{3-0} = shift{3-0};
}
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift),
+ def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>,
+ [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
@@ -1309,7 +1369,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
cc_out:$s)>,
Requires<[IsARM]>;
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn,
so_reg_reg:$shift, pred:$p,
cc_out:$s)>,
Requires<[IsARM]>;
@@ -1550,7 +1610,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
}
// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These psuedos use a hand-written selection code).
+// (These pseudos use a hand-written selection code).
let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
@@ -1652,7 +1712,7 @@ class CPS<dag iops, string asm_ops>
let Inst{27-20} = 0b00010000;
let Inst{19-18} = imod;
let Inst{17} = M; // Enabled if mode is set;
- let Inst{16} = 0;
+ let Inst{16-9} = 0b00000000;
let Inst{8-6} = iflags;
let Inst{5} = 0;
let Inst{4-0} = mode;
@@ -1839,20 +1899,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
}
}
-// All calls clobber the non-callee saved registers. SP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
+// SP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead.
let isCall = 1,
- // On non-Darwin platforms R9 is callee-saved.
// FIXME: Do we really need a non-predicated version? If so, it should
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
- Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [SP] in {
+ Defs = [LR], Uses = [SP] in {
def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsARM, IsNotDarwin]> {
+ Requires<[IsARM]> {
let Inst{31-28} = 0b1110;
bits<24> func;
let Inst{23-0} = func;
@@ -1862,7 +1919,7 @@ let isCall = 1,
def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM, IsNotDarwin]> {
+ Requires<[IsARM]> {
bits<24> func;
let Inst{23-0} = func;
let DecoderMethod = "DecodeBranchImmInstruction";
@@ -1872,7 +1929,7 @@ let isCall = 1,
def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
- Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ Requires<[IsARM, HasV5T]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
let Inst{3-0} = func;
@@ -1881,7 +1938,7 @@ let isCall = 1,
def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx", "\t$func",
[(ARMcall_pred GPR:$func)]>,
- Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ Requires<[IsARM, HasV5T]> {
bits<4> func;
let Inst{27-4} = 0b000100101111111111110011;
let Inst{3-0} = func;
@@ -1891,55 +1948,19 @@ let isCall = 1,
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T, IsNotDarwin]>;
+ Requires<[IsARM, HasV4T]>;
// ARMv4
def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T, IsNotDarwin]>;
-}
-
-let isCall = 1,
- // On Darwin R9 is call-clobbered.
- // R7 is marked as a use to prevent frame-pointer assignments from being
- // moved above / below calls.
- Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [R7, SP] in {
- def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
- 4, IIC_Br,
- [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
- Requires<[IsARM, IsDarwin]>;
-
- def BLr9_pred : ARMPseudoExpand<(outs),
- (ins bl_target:$func, pred:$p, variable_ops),
- 4, IIC_Br,
- [(ARMcall_pred tglobaladdr:$func)],
- (BL_pred bl_target:$func, pred:$p)>,
- Requires<[IsARM, IsDarwin]>;
-
- // ARMv5T and above
- def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
- 4, IIC_Br,
- [(ARMcall GPR:$func)],
- (BLX GPR:$func)>,
- Requires<[IsARM, HasV5T, IsDarwin]>;
-
- def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
- 4, IIC_Br,
- [(ARMcall_pred GPR:$func)],
- (BLX_pred GPR:$func, pred:$p)>,
- Requires<[IsARM, HasV5T, IsDarwin]>;
+ Requires<[IsARM, NoV4T]>;
- // ARMv4T
- // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
- def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
- 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T, IsDarwin]>;
-
- // ARMv4
- def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
- 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T, IsDarwin]>;
+ // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+ // return stack predictor.
+ def BMOVPCB_CALL : ARMPseudoInst<(outs),
+ (ins bl_target:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+ Requires<[IsARM]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -2006,47 +2027,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Tail calls.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- // Darwin versions.
- let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
- def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsDarwin]>;
-
- def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsDarwin]>;
-
- def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
- 4, IIC_Br, [],
- (Bcc br_target:$dst, (ops 14, zero_reg))>,
- Requires<[IsARM, IsDarwin]>;
-
- def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
- 4, IIC_Br, [],
- (BX GPR:$dst)>,
- Requires<[IsARM, IsDarwin]>;
-
- }
-
- // Non-Darwin versions (the difference is R9).
- let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
- def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsNotDarwin]>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>;
- def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsNotDarwin]>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>;
- def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
- 4, IIC_Br, [],
- (Bcc br_target:$dst, (ops 14, zero_reg))>,
- Requires<[IsARM, IsNotDarwin]>;
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM]>;
- def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
- 4, IIC_Br, [],
- (BX GPR:$dst)>,
- Requires<[IsARM, IsNotDarwin]>;
- }
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM]>;
}
// Secure Monitor Call is a system instruction.
@@ -2145,7 +2141,7 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
}
//===----------------------------------------------------------------------===//
-// Load / store Instructions.
+// Load / Store Instructions.
//
// Load
@@ -2197,9 +2193,10 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
}
// Indexed loads
-multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+multiclass AI2_ldridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin,
+ (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2211,7 +2208,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
}
def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin,
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 1;
@@ -2225,7 +2222,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am2offset_reg:$offset),
- IndexModePost, LdFrm, itin,
+ IndexModePost, LdFrm, iir,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2242,7 +2239,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am2offset_imm:$offset),
- IndexModePost, LdFrm, itin,
+ IndexModePost, LdFrm, iii,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2260,8 +2257,10 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
}
let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
-defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
+// IIC_iLoad_siu depending on whether it the offset register is shifted.
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
}
multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
@@ -2416,7 +2415,7 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{3-0} = offset{3-0};
let AsmMatchConverter = "cvtLdExtTWriteBackImm";
}
- def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb),
(ins addr_offset_none:$addr, postidx_reg:$Rm),
IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
"\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
@@ -2424,8 +2423,10 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{23} = Rm{4};
let Inst{22} = 0;
let Inst{11-8} = 0;
+ let Unpredictable{11-8} = 0b1111;
let Inst{3-0} = Rm{3-0};
let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+ let DecoderMethod = "DecodeLDR";
}
}
@@ -2451,10 +2452,11 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
}
// Indexed stores
-multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
+multiclass AI2_stridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
- StFrm, itin,
+ StFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2467,7 +2469,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, ldst_so_reg:$addr),
- IndexModePre, StFrm, itin,
+ IndexModePre, StFrm, iir,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 1;
@@ -2480,7 +2482,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
}
def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
- IndexModePost, StFrm, itin,
+ IndexModePost, StFrm, iir,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2497,7 +2499,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
- IndexModePost, StFrm, itin,
+ IndexModePost, StFrm, iii,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2514,8 +2516,10 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
}
let mayStore = 1, neverHasSideEffects = 1 in {
-defm STR : AI2_stridx<0, "str", IIC_iStore_ru>;
-defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>;
+// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
+// IIC_iStore_siu depending on whether it the offset register is shifted.
+defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>;
}
def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
@@ -2745,23 +2749,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
// Load / store multiple Instructions.
//
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
// mnemonic here. Without it is the cannonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2770,16 +2776,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2788,16 +2796,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2806,16 +2816,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2826,10 +2838,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
} // neverHasSideEffects
@@ -2843,6 +2857,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
(LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
RegConstraint<"$Rn = $wb">;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
//===----------------------------------------------------------------------===//
// Move Instructions.
//
@@ -2860,7 +2884,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
(MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
// A version for the smaller set of tail call registers.
@@ -3080,20 +3104,18 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
// ADD and SUB with 's' bit set.
//
-// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
-// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// Currently, ADDS/SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real ADD/SUB opcodes by
// AdjustInstrPostInstrSelection where we determine whether or not to
// set the "s" bit based on CPSR liveness.
//
-// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
-defm ADDS : AsI1_bin_s_irs<0b0100, "add",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm SUBS : AsI1_bin_s_irs<0b0010, "sub",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
@@ -3108,9 +3130,8 @@ defm RSB : AsI1_rbin_irs <0b0011, "rsb",
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm RSBS : AsI1_rbin_s_is<0b0011, "rsb",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm RSC : AI1_rsc_irs<0b0111, "rsc",
BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
@@ -3153,6 +3174,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1111;
}
// Saturating add/subtract
@@ -3445,19 +3468,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
// property. Remove them when it's possible to add those properties
// on an individual MachineInstr, not just an instuction description.
let isCommutable = 1 in {
-def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
Requires<[IsARM, HasV6]> {
let Inst{15-12} = 0b0000;
+ let Unpredictable{15-12} = 0b1111;
}
let Constraints = "@earlyclobber $Rd" in
-def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
pred:$p, cc_out:$s),
4, IIC_iMUL32,
- [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
- (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
}
@@ -3952,10 +3976,13 @@ def BCCZi64 : PseudoInst<(outs),
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
+
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, so_reg_imm:$shift, pred:$p),
4, IIC_iCMOVsr,
@@ -3996,8 +4023,44 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
4, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
+
+// Conditional instructions
+multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
+ Instruction irsr,
+ InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis> {
+ def ri : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rr : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsi : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+}
+
+defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+
} // neverHasSideEffects
+
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
//
@@ -4076,10 +4139,10 @@ let usesCustomInserter = 1 in {
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
@@ -4106,10 +4169,10 @@ let usesCustomInserter = 1 in {
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
@@ -4136,10 +4199,10 @@ let usesCustomInserter = 1 in {
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_SWAP_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
@@ -4185,14 +4248,14 @@ def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
-}
-
-let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+let hasExtraSrcRegAllocReq = 1 in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
(ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
+}
+
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
Requires<[IsARM, HasV7]> {
@@ -4451,10 +4514,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -4488,10 +4557,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
@@ -4635,7 +4710,8 @@ let isCall = 1,
// no encoding information is necessary.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
- QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in {
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
@@ -4644,31 +4720,37 @@ let Defs =
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- hasSideEffects = 1, isBarrier = 1 in {
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
-// FIXME: Non-Darwin version(s)
+// FIXME: Non-IOS version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
Defs = [ R7, LR, SP ] in {
def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
NoItinerary,
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsARM, IsDarwin]>;
+ Requires<[IsARM, IsIOS]>;
}
-// eh.sjlj.dispatchsetup pseudo-instruction.
-// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// eh.sjlj.dispatchsetup pseudo-instructions.
+// These pseudos are used for both ARM and Thumb2. Any differences are
// handled when the pseudo is expanded (which happens before any passes
// that need the instruction size).
-let isBarrier = 1, hasSideEffects = 1 in
-def Int_eh_sjlj_dispatchsetup :
- PseudoInst<(outs), (ins GPR:$src), NoItinerary,
- [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
- Requires<[IsDarwin]>;
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+ isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -4725,30 +4807,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// TODO: add,sub,and, 3-instr forms?
-// Tail calls
-def : ARMPat<(ARMtcret tcGPR:$dst),
- (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
- (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
- (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret tcGPR:$dst),
- (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
- (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
- (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+// Tail calls. These patterns also apply to Thumb mode.
+def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>;
+def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
+def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
// Direct calls
-def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
- Requires<[IsARM, IsNotDarwin]>;
-def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
- Requires<[IsARM, IsDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+def : ARMPat<(ARMcall_nolink texternalsym:$func),
+ (BMOVPCB_CALL texternalsym:$func)>;
// zextload i1 -> zextload i8
def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
@@ -4992,13 +5059,113 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">;
// USAX == USUBADDX
def : MnemonicAlias<"usubaddx", "usax">;
-// LDRSBT/LDRHT/LDRSHT post-index offset if optional.
-// Note that the write-back output register is a dummy operand for MC (it's
-// only meaningful for codegen), so we just pass zero here.
-// FIXME: tblgen not cooperating with argument conversions.
-//def : InstAlias<"ldrsbt${p} $Rt, $addr",
-// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>;
-//def : InstAlias<"ldrht${p} $Rt, $addr",
-// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
-//def : InstAlias<"ldrsht${p} $Rt, $addr",
-// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
+// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+ (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+
+// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
+// LSR, ROR, and RRX instructions.
+// FIXME: We need C++ parser hooks to map the alias to the MOV
+// encoding. It seems we should be able to do that sort of thing
+// in tblgen, but it could get ugly.
+def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+// shifter instructions also support a two-operand form.
+def : ARMInstAlias<"asr${s}${p} $Rm, $imm",
+ (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rm, $imm",
+ (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rm, $imm",
+ (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rm, $imm",
+ (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"asr${s}${p} $Rn, $Rm",
+ (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm",
+ (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm",
+ (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
+ (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+
+
+// 'mul' instruction can be specified with only two operands.
+def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
+ (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+ Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+// 'it' blocks in ARM mode just validate the predicates. The IT itself
+// is discarded.
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7aad18695bb3..c7219a60f6c3 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1,4 +1,4 @@
-//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,45 @@
//===----------------------------------------------------------------------===//
// NEON-specific Operands.
//===----------------------------------------------------------------------===//
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
+def nImmSplatI8 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI8AsmOperand;
+}
+def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
+def nImmSplatI16 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI16AsmOperand;
+}
+def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
+def nImmSplatI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI32AsmOperand;
+}
+def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
+def nImmVMOVI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperand;
+}
+def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
+def nImmVMOVI32Neg : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32NegAsmOperand;
+}
+def nImmVMOVF32 : Operand<i32> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
+def nImmSplatI64 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI64AsmOperand;
+}
+
def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
@@ -40,6 +79,326 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
let MIOperandInfo = (ops i32imm);
}
+// Register list of one D register.
+def VecListOneDAsmOperand : AsmOperandClass {
+ let Name = "VecListOneD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
+ let ParserMatchClass = VecListOneDAsmOperand;
+}
+// Register list of two sequential D registers.
+def VecListDPairAsmOperand : AsmOperandClass {
+ let Name = "VecListDPair";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
+ let ParserMatchClass = VecListDPairAsmOperand;
+}
+// Register list of three sequential D registers.
+def VecListThreeDAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
+ let ParserMatchClass = VecListThreeDAsmOperand;
+}
+// Register list of four sequential D registers.
+def VecListFourDAsmOperand : AsmOperandClass {
+ let Name = "VecListFourD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
+ let ParserMatchClass = VecListFourDAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpaced";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
+ let ParserMatchClass = VecListDPairSpacedAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListThreeQAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
+ let ParserMatchClass = VecListThreeQAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListFourQAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
+ let ParserMatchClass = VecListFourQAsmOperand;
+}
+
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListDPairAllLanesAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpacedAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoSpacedAllLanes"> {
+ let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
+}
+// Register list of three D registers, with "all lanes" subscripting.
+def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeDAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeAllLanes"> {
+ let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three sequential Q regs).
+def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeSpacedAllLanes"> {
+ let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
+}
+// Register list of four D registers, with "all lanes" subscripting.
+def VecListFourDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
+ let ParserMatchClass = VecListFourDAllLanesAsmOperand;
+}
+// Register list of four D registers spaced by 2 (four sequential Q regs).
+def VecListFourQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQAllLanes : RegisterOperand<DPR,
+ "printVectorListFourSpacedAllLanes"> {
+ let ParserMatchClass = VecListFourQAllLanesAsmOperand;
+}
+
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of two D registers with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two Q registers with half-word lane subscripting.
+def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
+// Register list of three D registers with byte lane subscripting.
+def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of three Q registers with half-word lane subscripting.
+def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of four D registers with byte lane subscripting.
+def VecListFourDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of four Q registers with half-word lane subscripting.
+def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -103,6 +462,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
@@ -164,30 +524,22 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
}]>;
//===----------------------------------------------------------------------===//
-// NEON operand definitions
-//===----------------------------------------------------------------------===//
-
-def nModImm : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
-}
-
-//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
// Use VLDM to load a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
def VLDMQIA
- : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
- [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+ [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQIA
- : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
IIC_fpStore_m, "",
- [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+ [(store (v2f64 DPair:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -197,12 +549,31 @@ class VLDQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
@@ -215,17 +586,17 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+ : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1,
- "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+ : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x2,
- "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
@@ -241,59 +612,82 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
-def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
-
// ...with address register writeback:
-class VLD1DWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
- "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1DWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
- "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
-def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
-def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
-def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
-
-def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">;
-def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
-def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
-def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
-
-def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
+defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
-// ...with 3 registers (some of these are only for the disassembler):
+// ...with 3 registers
class VLD1D3<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
-class VLD1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
@@ -301,31 +695,40 @@ def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
-def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">;
-def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
-def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
-def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
-def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-// ...with 4 registers (some of these are only for the disassembler):
+// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-class VLD1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,
- (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
- []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
@@ -333,91 +736,80 @@ def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
-def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">;
-def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
-def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
-def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
+defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
+defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
+defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
-def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
- (ins addrmode6:$Rn), IIC_VLD2,
- "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$Rn), IIC_VLD2x2,
- "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
-def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
-def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
-def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
-def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
-def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
-
-def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
-def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
-def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
- "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
- "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
-def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
-def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-
-// ...with double-spaced registers (for disassembly only):
-def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
-def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
-def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
-def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+// ...with double-spaced registers
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -601,12 +993,11 @@ def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
}
def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
let Inst{7-6} = lane{1-0};
- let Inst{4} = Rn{4};
+ let Inst{5-4} = Rn{5-4};
}
def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
let Inst{7} = lane{0};
- let Inst{5} = Rn{4};
- let Inst{4} = Rn{4};
+ let Inst{5-4} = Rn{5-4};
}
def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
@@ -776,7 +1167,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
}
def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
@@ -787,7 +1178,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
}
def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
@@ -802,7 +1193,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
let Rm = 0b1111;
- let Inst{4} = Rn{4};
+ let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
}
@@ -813,7 +1204,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -826,7 +1217,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -854,7 +1245,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -866,7 +1257,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -877,117 +1268,142 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
- IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
- [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
- let Pattern = [(set QPR:$dst,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
-def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
- (VLD1DUPq32Pseudo addrmode6:$addr)>;
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListDPairAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
-
-def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
-def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
-class VLD2DUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD2dup,
- "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
-def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
-
-def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
-// ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+// ...with double-spaced registers
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// ...with address register writeback:
-class VLD2DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
- "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD2DupInstruction";
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
+ def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
-def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
-def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
-
-def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">;
-def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
-def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
-def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
@@ -1008,9 +1424,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
// ...with double-spaced registers (not used for codegen):
-def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">;
-def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
-def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
// ...with address register writeback:
class VLD3DUPWB<bits<4> op7_4, string Dt>
@@ -1026,9 +1442,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
-def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
-def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
-def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
@@ -1054,9 +1470,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
// ...with double-spaced registers (not used for codegen):
-def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">;
-def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
-def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
// ...with address register writeback:
class VLD4DUPWB<bits<4> op7_4, string Dt>
@@ -1073,9 +1489,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
-def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
-def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
@@ -1093,12 +1509,29 @@ class VSTQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
+class VSTQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
class VSTQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
class VSTQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
+class VSTQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+
class VSTQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
class VSTQQQQWBPseudo<InstrItinClass itin>
@@ -1108,16 +1541,15 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
- IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
class VST1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b1010,op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
- "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
@@ -1133,185 +1565,233 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">;
def VST1q32 : VST1Q<{1,0,?,?}, "32">;
def VST1q64 : VST1Q<{1,1,?,?}, "64">;
-def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
-def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
-def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
-def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
-
// ...with address register writeback:
-class VST1DWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
- "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1DWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+ IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST1QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+ IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
-def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
-def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
-def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
-
-def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">;
-def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
-def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
-def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
-def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
-// ...with 3 registers (some of these are only for the disassembler):
+// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
-def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
-// ...with 4 registers (some of these are only for the disassembler):
+// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ (ins addrmode6:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
- "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
-def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
- IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
-def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
-def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
-def VST2q8 : VST2Q<{0,0,?,?}, "8">;
-def VST2q16 : VST2Q<{0,1,?,?}, "16">;
-def VST2q32 : VST2Q<{1,0,?,?}, "32">;
-
-def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
-def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
-def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
- "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
-def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-
-// ...with double-spaced registers (for disassembly only):
-def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+// ...with double-spaced registers
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1458,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin>
// VST1LN : Vector Store (single element from one lane)
class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
- PatFrag StoreOp, SDNode ExtractOp>
- : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
- IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
- let Rm = 0b1111;
- let DecoderMethod = "DecodeVST1LN";
-}
-class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
- PatFrag StoreOp, SDNode ExtractOp>
+ PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
+ (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
@@ -1482,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
}
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
-def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
+ addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
@@ -1507,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
// ...with address register writeback:
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
- PatFrag StoreOp, SDNode ExtractOp>
+ PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
+ (ins AdrMode:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- addrmode6:$Rn, am6offset:$Rm))]> {
+ AdrMode:$Rn, am6offset:$Rm))]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
@@ -1524,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
}
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
- extractelt> {
+ extractelt, addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
@@ -1585,10 +2057,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
- "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []> {
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
@@ -1914,8 +2386,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
@@ -1924,8 +2396,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
@@ -1954,8 +2426,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -1965,8 +2437,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -1987,8 +2459,8 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
@@ -1998,8 +2470,8 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
@@ -2028,8 +2500,8 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -2040,8 +2512,8 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -2073,9 +2545,9 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
@@ -2086,9 +2558,9 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
@@ -2108,9 +2580,9 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
@@ -2122,9 +2594,9 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDNode MulOp, SDNode ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
@@ -2182,9 +2654,9 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
@@ -2194,9 +2666,9 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
@@ -2230,9 +2702,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
@@ -2243,9 +2715,9 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
@@ -2277,8 +2749,8 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
@@ -2286,8 +2758,8 @@ class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
@@ -2332,8 +2804,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -2342,8 +2814,8 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -2417,9 +2889,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
@@ -2649,14 +3121,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v4i32, v4i32, OpNode, Commutable>;
}
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
- def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v4i16, ShOp>;
- def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
- v2i32, ShOp>;
- def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v4i16, ShOp>;
- def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
v4i32, v2i32, ShOp>;
}
@@ -3165,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- SDNode OpNode> {
+ string baseOpc, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
@@ -3199,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
+
+ // Aliases for two-operand forms (source and dest regs the same).
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v2i32"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v1i64"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v2i64"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
}
// Neon Shift-Accumulate vector operations,
@@ -3321,15 +3817,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -3418,7 +3914,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
@@ -3509,10 +4005,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -3567,10 +4063,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -3619,6 +4115,37 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", int_arm_neon_vqdmlsl>;
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+// Fused Vector Multiply Subtract (floating-point)
+def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
+ (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
+ (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+ (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
@@ -3741,7 +4268,7 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
v4i32, v4i32, or, 1>;
def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3750,7 +4277,7 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
}
def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3759,7 +4286,7 @@ def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
}
def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3768,7 +4295,7 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
}
def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3790,7 +4317,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(vnotq QPR:$Vm))))]>;
def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3799,7 +4326,7 @@ def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
}
def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3808,7 +4335,7 @@ def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
}
def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3817,7 +4344,7 @@ def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
}
def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3842,28 +4369,28 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
let isReMaterializable = 1 in {
def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
@@ -3912,12 +4439,12 @@ def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
@@ -3926,12 +4453,12 @@ def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
// for equivalent operations with different register constraints; it just
@@ -4119,8 +4646,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
-defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+ NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+ NEONvshru>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -4129,18 +4658,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
- ValueType OpTy, SDNode OpNode>
+ ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
- ResTy, OpTy, OpNode> {
+ ResTy, OpTy, ImmTy, OpNode> {
let Inst{21-16} = op21_16;
let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
- v8i16, v8i8, NEONvshlli>;
+ v8i16, v8i8, imm8, NEONvshlli>;
def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
- v4i32, v4i16, NEONvshlli>;
+ v4i32, v4i16, imm16, NEONvshlli>;
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
- v2i64, v2i32, NEONvshlli>;
+ v2i64, v2i32, imm32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4154,8 +4683,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
-defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+ NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+ NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -4298,13 +4829,15 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, int_arm_neon_vcnt>;
-// Vector Swap -- for disassembly only.
+// Vector Swap
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
- (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
- "vswp", "$Vd, $Vm", "", []>;
+ (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
- "vswp", "$Vd, $Vm", "", []>;
+ (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
// Vector Move Operations.
@@ -4318,89 +4851,98 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{0};
@@ -4442,24 +4984,24 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
let Constraints = "$src1 = $V" in {
def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
- (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+ (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
[(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
- (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+ (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
[(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
- (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+ (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
[(set DPR:$V, (insertelt (v2i32 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{0};
@@ -4627,6 +5169,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
// VMOVL : Vector Lengthening Move
defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
+def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
+def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
// Vector Conversions.
@@ -4650,6 +5195,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+let DecoderMethod = "DecodeVCVTD" in {
def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4658,7 +5204,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+}
+let DecoderMethod = "DecodeVCVTQ" in {
def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4667,6 +5215,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+}
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -4759,34 +5308,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
- (Ty DPR:$Vm), imm:$index)))]> {
+ (Ty DPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
- (Ty QPR:$Vm), imm:$index)))]> {
+ (Ty QPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
}
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
@@ -4795,17 +5344,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
(i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
-def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
}
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
(v4f32 QPR:$Vm),
(i32 imm:$index))),
@@ -4825,7 +5378,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
-def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
@@ -4835,7 +5390,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
-def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
@@ -4847,27 +5404,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
let DecoderMethod = "DecodeTBLInstruction" in {
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
- "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
- [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+ (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+ (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ (ins VecListFourD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
-def VTBL2Pseudo
- : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
def VTBL3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
def VTBL4Pseudo
@@ -4876,31 +5431,28 @@ def VTBL4Pseudo
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
- "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+ (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
[(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
- DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+ DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+ (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
def VTBX4
- : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
-def VTBX2Pseudo
- : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
- IIC_VTBX2, "$orig = $dst", []>;
def VTBX3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX3, "$orig = $dst", []>;
@@ -4950,9 +5502,13 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
@@ -5028,3 +5584,1448 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+
+// Vector lengthening move with load, matching extending loads.
+
+// extload, zextload and sextload for a standard lengthening load. Example:
+// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
+// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+}
+
+// extload, zextload and sextload for a lengthening load which only uses
+// half the lanes available. Example:
+// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
+// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+// (VLDRS addrmode5:$addr),
+// ssub_0)),
+// dsub_0)>;
+multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length.
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
+// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+// (VLDRS addrmode5:$addr),
+// ssub_0)),
+// dsub_0)),
+// qsub_0)>;
+multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, SubRegIndex RegType> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+}
+
+defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
+
+defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
+defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+// Double lengthening - v4i8 -> v4i16 -> v4i32
+defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>;
+// v2i8 -> v2i16 -> v2i32
+defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>;
+// v2i16 -> v2i32 -> v2i64
+defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>;
+
+// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
+def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+ (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+ (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+ (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+ (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+ (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+ (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+ (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+ (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+ (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+ (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+ (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+ (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+ (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+ (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+ (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+ (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+ (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+ (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+ (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+ (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+ (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+ (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD4 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+
+// VLD4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+ (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VMAX.
+def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
+ (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
+ (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
+ (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
+ (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
+ (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
+ (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
+ (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
+ (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
+ (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
+ (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
+ (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
+ (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
+ (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
+ (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VMIN.
+def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
+ (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
+ (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
+ (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
+ (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
+ (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
+ (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
+ (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
+ (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
+ (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
+ (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
+ (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
+ (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
+ (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
+ (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VPADD.
+def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm",
+ (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm",
+ (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm",
+ (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm",
+ (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VSRA.
+ // Signed.
+def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
+ (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
+ (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
+ (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
+ (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
+ (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
+ (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
+ (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
+ (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+ // Unsigned.
+def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
+ (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
+ (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
+ (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
+ (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
+ (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
+ (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
+ (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
+ (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// Two-operand variants for VSRI.
+def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
+ (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
+ (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
+ (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
+ (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
+ (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
+ (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
+ (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
+ (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// Two-operand variants for VSLI.
+def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
+ (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
+ (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
+ (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
+ (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
+ (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
+ (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
+ (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
+ (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// VSWP allows, but does not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
+// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+
+// "vmov Rd, #-imm" can be handled via "vmvn".
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
+
+def : NEONMnemonicAlias<"vzipq", "vzip">;
+def : NEONMnemonicAlias<"vswpq", "vswp">;
+
+def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
+def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
+
+
+// Alias for loading floating point immediates that aren't representable
+// using the vmov.f32 encoding but the bitpattern is representable using
+// the .i32 encoding.
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index cedb54799db0..6335229d3c2a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> {
let ParserMatchClass = t_imm0_508s4_asmoperand;
let OperandType = "OPERAND_IMMEDIATE";
}
+// Alias use only, so no printer is necessary.
+def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; }
+def t_imm0_508s4_neg : Operand<i32> {
+ let ParserMatchClass = t_imm0_508s4_neg_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
// Define Thumb specific addressing modes.
@@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
let DecoderMethod = "DecodeThumbAddSPImm";
}
+def : tInstAlias<"add${p} sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+
// Can optionally specify SP as a three operand instruction.
def : tInstAlias<"add${p} sp, sp, $imm",
(tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
@@ -387,6 +398,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
bits<4> Rm;
let Inst{6-3} = Rm;
let Inst{2-0} = 0b000;
+ let Unpredictable{2-0} = 0b111;
}
}
@@ -404,15 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// prevent stack-pointer assignments that appear immediately before calls from
// potentially appearing dead.
let isCall = 1,
- // On non-Darwin platforms R9 is callee-saved.
- Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [SP] in {
+ Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
(outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
"bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb, IsNotDarwin]> {
+ Requires<[IsThumb]> {
bits<22> func;
let Inst{26} = func{21};
let Inst{25-16} = func{20-11};
@@ -426,7 +436,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+ Requires<[IsThumb, HasV5T]> {
bits<21> func;
let Inst{25-16} = func{20-11};
let Inst{13} = 1;
@@ -439,7 +449,7 @@ let isCall = 1,
def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
"blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+ Requires<[IsThumb, HasV5T]>,
T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
bits<4> func;
let Inst{6-3} = func;
@@ -450,38 +460,7 @@ let isCall = 1,
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
-}
-
-let isCall = 1,
- // On Darwin R9 is call-clobbered.
- // R7 is marked as a use to prevent frame-pointer assignments from being
- // moved above / below calls.
- Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [R7, SP] in {
- // Also used for Thumb2
- def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
- 4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
- (tBL pred:$p, t_bltarget:$func)>,
- Requires<[IsThumb, IsDarwin]>;
-
- // ARMv5T and above, also used for Thumb2
- def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
- 4, IIC_Br, [(ARMcall tglobaladdr:$func)],
- (tBLXi pred:$p, t_blxtarget:$func)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
-
- // Also used for Thumb2
- def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops),
- 2, IIC_Br, [(ARMtcall GPR:$func)],
- (tBLXr pred:$p, GPR:$func)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
-
- // ARMv4T
- def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
- 4, IIC_Br,
- [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+ Requires<[IsThumb, IsThumb1Only]>;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
@@ -523,28 +502,22 @@ let isBranch = 1, isTerminator = 1 in
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- // Darwin versions.
- let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
- // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
- // on Darwin), so it's in ARMInstrThumb2.td.
+ // IOS versions.
+ let Uses = [SP] in {
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
4, IIC_Br, [],
(tBX GPR:$dst, (ops 14, zero_reg))>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb]>;
}
- // Non-Darwin versions (the difference is R9).
- let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
+ // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
+ // on IOS), so it's in ARMInstrThumb2.td.
+ // Non-IOS version:
+ let Uses = [SP] in {
def tTAILJMPdND : tPseudoExpand<(outs),
(ins t_brtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
(tB t_brtarget:$dst, pred:$p)>,
- Requires<[IsThumb, IsNotDarwin]>;
- def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
- 4, IIC_Br, [],
- (tBX GPR:$dst, (ops 14, zero_reg))>,
- Requires<[IsThumb, IsNotDarwin]>;
+ Requires<[IsThumb, IsNotIOS]>;
}
}
@@ -652,7 +625,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
}
// Load tconstpool
-// FIXME: Use ldr.n to work around a Darwin assembler bug.
+// FIXME: Use ldr.n to work around a darwin assembler bug.
let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
"ldr", ".n\t$Rt, $addr",
@@ -666,10 +639,9 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
}
// FIXME: Remove this entry when the above ldr.n workaround is fixed.
-// For disassembly use only.
-def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [/* disassembly only */]>,
+// For assembly/disassembly use only.
+def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr", []>,
T1Encoding<{0,1,0,0,1,?}> {
// A6.2 & A8.6.59
bits<3> Rt;
@@ -1131,9 +1103,6 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
- (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1259,19 +1228,24 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
// preserve all of the callee-saved resgisters, which is exactly what we want.
// $val is a scratch register for our use.
let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "","",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
-// FIXME: Non-Darwin version(s)
+// FIXME: Non-IOS version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
Defs = [ R7, LR, SP ] in
def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb, IsIOS]>;
+
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
+ isBarrier = 1 in
+def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1309,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// Direct calls
def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
- Requires<[IsThumb, IsNotDarwin]>;
-def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb]>;
def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>;
-def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
+ Requires<[IsThumb, HasV5T]>;
// Indirect calls to ARM routines
def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>;
-def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
+ Requires<[IsThumb, HasV5T]>;
// zextload i1 -> zextload i8
def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
@@ -1434,3 +1402,16 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
+
+// Implied destination operand forms for shifts.
+def : tInstAlias<"lsl${s}${p} $Rdm, $imm",
+ (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>;
+def : tInstAlias<"lsr${s}${p} $Rdm, $imm",
+ (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
+def : tInstAlias<"asr${s}${p} $Rdm, $imm",
+ (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 05dcc8993969..e6fb9d5f01eb 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -65,7 +65,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
-def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
+def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getT2SOImmVal(Imm) != -1;
}]> {
@@ -76,26 +76,39 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
-def t2_so_imm_not : Operand<i32>,
- PatLeaf<(imm), [{
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use t2_so_imm.
+def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_not_XFORM>;
+}], t2_so_imm_not_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ int64_t Value = -(int)N->getZExtValue();
+ return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095 : Operand<i32>,
- ImmLeaf<i32, [{
+def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 4096;
-}]>;
+}]> {
+ let ParserMatchClass = imm0_4095_asmoperand;
+}
-def imm0_4095_neg : PatLeaf<(i32 imm), [{
+def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; }
+def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 4096;
-}], imm_neg_XFORM>;
+}], imm_neg_XFORM> {
+ let ParserMatchClass = imm0_4095_neg_asmoperand;
+}
def imm0_255_neg : PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 255;
@@ -129,6 +142,12 @@ def t2ldrlabel : Operand<i32> {
let PrintMethod = "printT2LdrLabelOperand";
}
+def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
+def t2ldr_pcrel_imm12 : Operand<i32> {
+ let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand;
+ // used for assembler pseudo instruction and maps to t2ldrlabel, so
+ // doesn't need encoder or print methods of its own.
+}
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
@@ -545,6 +564,11 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, string baseOpc, bit Commutable = 0> :
T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ // Assembler aliases w/ the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
(!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
@@ -556,6 +580,10 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
cc_out:$s)>;
// and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
(!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
@@ -608,25 +636,48 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
///
/// These opcodes will be converted to the real non-S opcodes by
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
// shifted imm
- def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
- opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>;
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_imm:$imm))]>;
// register
- def rr : T2sThreeReg<
- (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
- opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>;
+ def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
+ 4, iir,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ }
// shifted register
- def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
- opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>;
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, iis,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<PatFrag opnode> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iALUi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
+ GPRnopc:$Rn))]>;
+ // shifted register
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, IIC_iALUsi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
+ GPRnopc:$Rn))]>;
}
}
@@ -735,26 +786,6 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
-/// version is not needed since this is only for codegen.
-///
-/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
- // shifted imm
- def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>;
- // shifted register
- def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>;
-}
-}
-
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
@@ -930,7 +961,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let DecoderMethod = "DecodeT2LoadShift";
}
- // FIXME: Is the pci variant actually needed?
+ // pci variant is very similar to i12, but supports negative offsets
+ // from the PC.
def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
[(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
@@ -1315,14 +1347,16 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
+
+let mayStore = 1, neverHasSideEffects = 1 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1343,15 +1377,16 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
}
+} // mayStore = 1, neverHasSideEffects = 1
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, addr_offset_none:$Rn,
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
"str", "\t$Rt, $Rn$offset",
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
- (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset))]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1398,7 +1433,6 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
}
-
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
@@ -1455,7 +1489,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
"$addr.base = $wb", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
-// data/instruction access. These are for disassembly only.
+// data/instruction access.
// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
@@ -1513,6 +1547,10 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let DecoderMethod = "DecodeT2LoadShift";
}
+ // FIXME: We should have a separate 'pci' variant here. As-is we represent
+ // it via the i12 variant, which it's related to, but that means we can
+ // represent negative immediates, which aren't legal for anything except
+ // the 'pci' case (Rn == 15).
}
defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
@@ -1689,6 +1727,8 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, zero_reg)>;
def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
pred:$p, CPSR)>;
def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
@@ -1837,11 +1877,9 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
-defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
- IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
- IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
let hasPostISelHook = 1 in {
@@ -1857,8 +1895,7 @@ defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -2840,6 +2877,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, pred:$p),
4, IIC_iCMOVr,
@@ -2883,7 +2922,7 @@ def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
let isMoveImm = 1 in
def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
- IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+ IIC_iCMOVi, "mvn", "\t$Rd, $imm",
[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd"> {
@@ -2922,6 +2961,35 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
RegConstraint<"$false = $Rd">;
} // isCodeGenOnly = 1
+
+multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
+ // shifted imm
+ def ri : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ // register
+ def rr : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ // shifted register
+ def rs : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+} // T2I_bincc_irs
+
+defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -3043,9 +3111,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
let Inst{11-8} = Rd;
let Inst{7-0} = addr{7-0};
}
-}
-
-let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+let hasExtraSrcRegAllocReq = 1 in
def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
@@ -3054,6 +3120,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+}
def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
Requires<[IsThumb2, HasV7]> {
@@ -3081,8 +3148,9 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
// $val is a scratch register for our use.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
- QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
@@ -3091,7 +3159,8 @@ let Defs =
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
@@ -3128,6 +3197,7 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{13} = target{17};
let Inst{21-16} = target{16-11};
let Inst{10-0} = target{10-0};
+ let DecoderMethod = "DecodeT2BInstruction";
}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
@@ -3195,19 +3265,32 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let DecoderMethod = "DecodeThumb2BCCInstruction";
}
-// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
// it goes here.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- // Darwin version.
- let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in
+ // IOS version.
+ let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
(ins uncondbrtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
(t2B uncondbrtarget:$dst, pred:$p)>,
- Requires<[IsThumb2, IsDarwin]>;
+ Requires<[IsThumb2, IsIOS]>;
}
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+ // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+ // return stack predictor.
+ def t2BMOVPCB_CALL : tPseudoInst<(outs),
+ (ins t_bltarget:$func, variable_ops),
+ 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+ Requires<[IsThumb]>;
+}
+
+// Direct calls
+def : T2Pat<(ARMcall_nolink texternalsym:$func),
+ (t2BMOVPCB_CALL texternalsym:$func)>,
+ Requires<[IsThumb]>;
+
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
@@ -3430,7 +3513,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
imm:$cp))]>,
Requires<[IsThumb2]>;
-// Pseudo isntruction that combines movs + predicated rsbmi
+// Pseudo isntruction that combines movs + predicated rsbmi
// to implement integer ABS
let usesCustomInserter = 1, Defs = [CPSR] in {
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
@@ -3667,20 +3750,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3851,6 +3946,29 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
(t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $Rm",
+ (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// add w/ negative immediates is just a sub.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
// Aliases for SUB without the ".w" optional width specifier.
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
@@ -3862,6 +3980,18 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
(t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
// Alias for compares without the ".w" optional width specifier.
def : t2InstAlias<"cmn${p} $Rn, $Rm",
@@ -3900,7 +4030,20 @@ def : t2InstAlias<"ldrsb${p} $Rt, $addr",
def : t2InstAlias<"ldrsh${p} $Rt, $addr",
(t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
-// Alias for MVN without the ".w" optional width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+
+// Alias for MVN with(out) the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
(t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
@@ -3921,6 +4064,30 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// STMDB/STMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
+ (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stmdb${p}.w $Rn!, $regs",
+ (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"ldmdb${p}.w $Rn, $regs",
+ (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs",
+ (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
@@ -4016,3 +4183,87 @@ def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+
+// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+
+
+// Wide 'mul' encoding can be specified with only two operands.
+def : t2InstAlias<"mul${p} $Rn, $Rm",
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
+
+// LDR(literal) w/ alternate [pc, #imm] syntax.
+def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ // Version w/ the .w suffix.
+def : t2InstAlias<"ldr${p}.w $Rt, $addr",
+ (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
+ (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
+ (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p}.w $Rt, $addr",
+ (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
+ (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"add${p} $Rd, pc, $imm",
+ (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e746cf20d032..3600b889d6f1 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1,4 +1,4 @@
-//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -61,6 +61,22 @@ def vfp_f64imm : Operand<f64>,
let ParserMatchClass = FPImmOperand;
}
+// The VCVT to/from fixed-point instructions encode the 'fbits' operand
+// (the number of fixed bits) differently than it appears in the assembly
+// source. It's encoded as "Size - fbits" where Size is the size of the
+// fixed-point representation (32 or 16) and fbits is the value appearing
+// in the assembly source, an integer in [0,16] or (0,32], depending on size.
+def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
+def fbits32 : Operand<i32> {
+ let PrintMethod = "printFBits32";
+ let ParserMatchClass = fbits32_asm_operand;
+}
+
+def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
+def fbits16 : Operand<i32> {
+ let PrintMethod = "printFBits16";
+ let ParserMatchClass = fbits16_asm_operand;
+}
//===----------------------------------------------------------------------===//
// Load / store Instructions.
@@ -69,11 +85,11 @@ def vfp_f64imm : Operand<f64>,
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
- IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ IIC_fpLoad64, "vldr", "\t$Dd, $addr",
[(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
- IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+ IIC_fpLoad32, "vldr", "\t$Sd, $addr",
[(set SPR:$Sd, (load addrmode5:$addr))]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
@@ -83,11 +99,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
- IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+ IIC_fpStore64, "vstr", "\t$Dd, $addr",
[(store (f64 DPR:$Dd), addrmode5:$addr)]>;
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
- IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+ IIC_fpStore32, "vstr", "\t$Sd, $addr",
[(store SPR:$Sd, addrmode5:$addr)]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
@@ -190,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
Requires<[HasVFP2]>;
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -270,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
@@ -299,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
// FP Unary Operations.
@@ -319,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
let D = VFPNeonA8Domain;
}
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
@@ -360,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -790,127 +814,131 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
// S32 (U=0, sx=1) -> SL
// U32 (U=1, sx=1) -> UL
-// FIXME: Marking these as codegen only seems wrong. They are real
-// instructions(?)
-let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
-def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{0};
+ let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{4};
+ let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
-def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
-def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
-def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
// Fixed-Point to FP:
-def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
-def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
-def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
-def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
-} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+} // End of 'let Constraints = "$a = $dst" in'
//===----------------------------------------------------------------------===//
// FP Multiply-Accumulate Operations.
@@ -922,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -930,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -938,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -949,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -957,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -965,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -976,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -984,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -992,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1003,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1018,11 +1046,172 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+//===----------------------------------------------------------------------===//
+// Fused FP Multiply-Accumulate Operations.
+//
+def VFMAD : ADbI<0b11101, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
+ (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
+ (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFMSD : ADbI<0b11101, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma (fneg x), y, z) -> (vfms x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
+def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma x, y, z)) -> (vfnma x, y, z)
+def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma x, (fneg y), z) -> (vnfms x, y, z)
+def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
@@ -1063,9 +1252,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
// to APSR.
-let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
- "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR] in
@@ -1079,6 +1268,10 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, fpexc", []>;
def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, fpsid", []>;
+ def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr0", []>;
+ def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr1", []>;
}
//===----------------------------------------------------------------------===//
@@ -1160,6 +1353,115 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
+def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
+def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
+def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
+def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
+def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
+def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+// VLDR/VSTR accept an optional type suffix.
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+
+// VMUL has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
+ (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm",
+ (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VADD has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm",
+ (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm",
+ (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VSUB has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm",
+ (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm",
+ (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+
+// VMOV can accept optional 32-bit or less data type suffix suffix.
+def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+
+def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
+ (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
+ (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
+
+// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
+// VMOVD does.
+def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
+ (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 45b7e48d0cfb..98930ccdb194 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,7 +13,7 @@
#define DEBUG_TYPE "jit"
#include "ARMJITInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
@@ -61,7 +61,7 @@ extern "C" {
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
// pass it as the argument to the C part of the callback
@@ -85,7 +85,7 @@ extern "C" {
//
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
- "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
//
// We need to exchange the values in slots 0 and 1 so we can
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 2f9792813d32..792818442724 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -1,4 +1,4 @@
-//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index faa8ba76845e..9ef2ace29cff 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,8 +15,8 @@
#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -32,6 +32,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -62,6 +64,7 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
RegScavenger *RS;
bool isThumb2;
@@ -90,7 +93,9 @@ namespace {
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
- DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ DebugLoc dl,
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs);
void MergeOpsUpdate(MachineBasicBlock &MBB,
MemOpQueue &MemOps,
unsigned memOpsBegin,
@@ -141,7 +146,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::LDMDB;
case ARM_AM::ib: return ARM::LDMIB;
}
- break;
case ARM::STRi12:
++NumSTMGened;
switch (Mode) {
@@ -151,7 +155,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
- break;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
@@ -160,7 +163,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::t2LDMIA;
case ARM_AM::db: return ARM::t2LDMDB;
}
- break;
case ARM::t2STRi8:
case ARM::t2STRi12:
++NumSTMGened;
@@ -169,7 +171,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::t2STMIA;
case ARM_AM::db: return ARM::t2STMDB;
}
- break;
case ARM::VLDRS:
++NumVLDMGened;
switch (Mode) {
@@ -177,7 +178,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VLDMSIA;
case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
}
- break;
case ARM::VSTRS:
++NumVSTMGened;
switch (Mode) {
@@ -185,7 +185,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VSTMSIA;
case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
}
- break;
case ARM::VLDRD:
++NumVLDMGened;
switch (Mode) {
@@ -193,7 +192,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VLDMDIA;
case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
}
- break;
case ARM::VSTRD:
++NumVSTMGened;
switch (Mode) {
@@ -201,10 +199,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VSTMDIA;
case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
}
- break;
}
-
- return 0;
}
namespace llvm {
@@ -259,8 +254,6 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::STMIB_UPD:
return ARM_AM::ib;
}
-
- return ARM_AM::bad_am_submode;
}
} // end namespace ARM_AM
@@ -291,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
int Offset, unsigned Base, bool BaseKill,
int Opcode, ARMCC::CondCodes Pred,
unsigned PredReg, unsigned Scratch, DebugLoc dl,
- SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs) {
// Only a single register to load / store. Don't bother.
unsigned NumRegs = Regs.size();
if (NumRegs <= 1)
@@ -359,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
+ // Add implicit defs for super-registers.
+ for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
+ MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
+
return true;
}
@@ -393,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
}
SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 8> ImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
// uses the same register, make sure to transfer any kill flag.
bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
+
+ // Collect any implicit defs of super-registers. They must be preserved.
+ for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
+ continue;
+ unsigned DefReg = MO->getReg();
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
+ ImpDefs.push_back(DefReg);
+ }
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
- Pred, PredReg, Scratch, dl, Regs))
+ Pred, PredReg, Scratch, dl, Regs, ImpDefs))
return;
// Merge succeeded, update records.
@@ -506,50 +514,84 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
return;
}
-static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg){
+static bool definesCPSR(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
unsigned MyPredReg = 0;
if (!MI)
return false;
- if (MI->getOpcode() != ARM::t2SUBri &&
- MI->getOpcode() != ARM::tSUBspi &&
- MI->getOpcode() != ARM::SUBri)
- return false;
+
+ bool CheckCPSRDef = false;
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::t2SUBri:
+ case ARM::SUBri:
+ CheckCPSRDef = true;
+ // fallthrough
+ case ARM::tSUBspi:
+ break;
+ }
// Make sure the offset fits in 8 bits.
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
- return (MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
- llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg);
+ if (!(MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg))
+ return false;
+
+ return CheckCPSRDef ? !definesCPSR(MI) : true;
}
-static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg){
+static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
unsigned MyPredReg = 0;
if (!MI)
return false;
- if (MI->getOpcode() != ARM::t2ADDri &&
- MI->getOpcode() != ARM::tADDspi &&
- MI->getOpcode() != ARM::ADDri)
- return false;
+
+ bool CheckCPSRDef = false;
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::t2ADDri:
+ case ARM::ADDri:
+ CheckCPSRDef = true;
+ // fallthrough
+ case ARM::tADDspi:
+ break;
+ }
if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
- return (MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
- llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg);
+ if (!(MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg))
+ return false;
+
+ return CheckCPSRDef ? !definesCPSR(MI) : true;
}
static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
@@ -603,7 +645,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::da: return ARM::LDMDA_UPD;
case ARM_AM::db: return ARM::LDMDB_UPD;
}
- break;
case ARM::STMIA:
case ARM::STMDA:
case ARM::STMDB:
@@ -615,7 +656,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::da: return ARM::STMDA_UPD;
case ARM_AM::db: return ARM::STMDB_UPD;
}
- break;
case ARM::t2LDMIA:
case ARM::t2LDMDB:
switch (Mode) {
@@ -623,7 +663,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::ia: return ARM::t2LDMIA_UPD;
case ARM_AM::db: return ARM::t2LDMDB_UPD;
}
- break;
case ARM::t2STMIA:
case ARM::t2STMDB:
switch (Mode) {
@@ -631,38 +670,31 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::ia: return ARM::t2STMIA_UPD;
case ARM_AM::db: return ARM::t2STMDB_UPD;
}
- break;
case ARM::VLDMSIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMSIA_UPD;
case ARM_AM::db: return ARM::VLDMSDB_UPD;
}
- break;
case ARM::VLDMDIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMDIA_UPD;
case ARM_AM::db: return ARM::VLDMDDB_UPD;
}
- break;
case ARM::VSTMSIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMSIA_UPD;
case ARM_AM::db: return ARM::VSTMSDB_UPD;
}
- break;
case ARM::VSTMDIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMDIA_UPD;
case ARM_AM::db: return ARM::VSTMDDB_UPD;
}
- break;
}
-
- return 0;
}
/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
@@ -686,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
bool BaseKill = MI->getOperand(0).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
@@ -783,7 +815,6 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
return ARM::t2STR_PRE;
default: llvm_unreachable("Unhandled opcode!");
}
- return 0;
}
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
@@ -809,7 +840,6 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
return ARM::t2STR_POST;
default: llvm_unreachable("Unhandled opcode!");
}
- return 0;
}
/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
@@ -841,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return false;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
bool DoMerge = false;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
unsigned NewOpc = 0;
@@ -1071,11 +1101,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
unsigned Opcode = MI->getOpcode();
if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
unsigned EvenReg = MI->getOperand(0).getReg();
unsigned OddReg = MI->getOperand(1).getReg();
unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
- if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
+ if (!Errata602117 &&
+ ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
return false;
MachineBasicBlock::iterator NewBBI = MBBI;
@@ -1087,15 +1123,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
bool OddDeadKill = isLd ?
MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
bool OddUndef = MI->getOperand(1).isUndef();
- const MachineOperand &BaseOp = MI->getOperand(2);
- unsigned BaseReg = BaseOp.getReg();
bool BaseKill = BaseOp.isKill();
bool BaseUndef = BaseOp.isUndef();
bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
int OffImm = getMemoryOpOffset(MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
if (OddRegNum > EvenRegNum && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a
@@ -1126,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
unsigned NewOpc = (isLd)
? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
: (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+ // so adjust and use t2LDRi12 here for that.
+ unsigned NewOpc2 = (isLd)
+ ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
DebugLoc dl = MBBI->getDebugLoc();
// If this is a load and base register is killed, it may have been
// re-defed by the load, make sure the first load does not clobber it.
@@ -1133,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
(BaseKill || OffKill) &&
(TRI->regsOverlap(EvenReg, BaseReg))) {
assert(!TRI->regsOverlap(OddReg, BaseReg));
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
+ if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0)
+ NewOpc = ARM::t2LDRi12;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1150,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenDeadKill = false;
OddDeadKill = true;
}
+ // Never kill the base register in the first instruction.
+ // <rdar://problem/11101911>
+ if (EvenReg == BaseReg)
+ EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
@@ -1206,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
int Offset = getMemoryOpOffset(MBBI);
// Watch out for:
// r4 := ldr [r5]
@@ -1380,6 +1425,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
@@ -1464,19 +1510,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && MCID.mayStore())
+ if (isLd && I->mayStore())
return false;
if (!isLd) {
- if (MCID.mayLoad())
+ if (I->mayLoad())
return false;
// It's not safe to move the first 'str' down.
// str r1, [r0]
// strh r5, [r0]
// str r4, [r0, #+4]
- if (MCID.mayStore())
+ if (I->mayStore())
return false;
}
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1498,6 +1543,23 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
return AddedRegPressure.size() <= MemRegs.size() * 2;
}
+
+/// Copy Op0 and Op1 operands into a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+ MachineInstr *Op1) {
+ assert(MI->memoperands_empty() && "expected a new machineinstr");
+ size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
+ + (Op1->memoperands_end() - Op1->memoperands_begin());
+
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+ MachineSDNode::mmo_iterator MemEnd =
+ std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+ MemEnd =
+ std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+ MI->setMemRefs(MemBegin, MemEnd);
+}
+
bool
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
@@ -1565,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (EvenReg == OddReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- Pred = llvm::getInstrPredicate(Op0, PredReg);
+ Pred = getInstrPredicate(Op0, PredReg);
dl = Op0->getDebugLoc();
return true;
}
@@ -1615,8 +1677,9 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
LastOp = Op;
}
- unsigned Opcode = Op->getOpcode();
- if (LastOpcode && Opcode != LastOpcode)
+ unsigned LSMOpcode
+ = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
+ if (LastOpcode && LSMOpcode != LastOpcode)
break;
int Offset = getMemoryOpOffset(Op);
@@ -1627,7 +1690,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
LastOffset = Offset;
LastBytes = Bytes;
- LastOpcode = Opcode;
+ LastOpcode = LSMOpcode;
if (++NumMove == 8) // FIXME: Tune this limit.
break;
}
@@ -1692,6 +1755,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ concatenateMemOperands(MIB, Op0, Op1);
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumLDRDFormed;
} else {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
@@ -1704,6 +1769,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ concatenateMemOperands(MIB, Op0, Op1);
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumSTRDFormed;
}
MBB->erase(Op0);
@@ -1745,8 +1812,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
MachineInstr *MI = MBBI;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isCall() || MCID.isTerminator()) {
+ if (MI->isCall() || MI->isTerminator()) {
// Stop at barriers.
++MBBI;
break;
@@ -1758,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
- if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
continue;
int Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index daa126def401..e2ac9a466ed8 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -31,8 +31,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
switch (MO.getTargetFlags()) {
- default:
- assert(0 && "Unknown target flag on symbol operand");
+ default: llvm_unreachable("Unknown target flag on symbol operand");
case 0:
break;
case ARMII::MO_LO16:
@@ -67,9 +66,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) {
switch (MO.getType()) {
- default:
- assert(0 && "unknown operand type");
- return false;
+ default: llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all non-CPSR implicit register operands.
if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
@@ -107,6 +104,9 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
break;
}
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ return false;
}
return true;
}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..af445e2f35aa
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void ARMFunctionInfo::anchor() { }
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 138f0c262271..f1c8fc84816e 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,6 +25,7 @@ namespace llvm {
/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private ARM-specific information for each MachineFunction.
class ARMFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
/// isThumb - True if this function is compiled under Thumb mode.
/// Used to initialized Align, so must precede it.
@@ -63,6 +64,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// GPR callee-saved (2) : r8, r10, r11
/// --------------------------------------------
/// DPR callee-saved : d8 - d15
+ ///
+ /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
+ /// Some may be spilled after the stack has been realigned.
unsigned GPRCS1Offset;
unsigned GPRCS2Offset;
unsigned DPRCSOffset;
@@ -79,6 +83,15 @@ class ARMFunctionInfo : public MachineFunctionInfo {
BitVector GPRCS2Frames;
BitVector DPRCSFrames;
+ /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
+ /// the aligned portion of the stack frame. This is always a contiguous
+ /// sequence of D-registers starting from d8.
+ ///
+ /// We do not keep track of the frame indices used for these registers - they
+ /// behave like any other frame index in the aligned stack frame. These
+ /// registers also aren't included in DPRCSSize above.
+ unsigned NumAlignedDPRCS2Regs;
+
/// JumpTableUId - Unique id for jumptables.
///
unsigned JumpTableUId;
@@ -104,6 +117,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false) {}
@@ -137,6 +151,9 @@ public:
unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+ unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; }
+ void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; }
+
unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index 18e162000602..efa22fbed9f7 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 1cba1ba591ef..6f3819afd0a5 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
using namespace llvm;
+void ARMRegisterInfo::anchor() { }
+
ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMBaseRegisterInfo(tii, sti) {
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 8edfb9a2057f..8a248425c33c 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,15 +15,15 @@
#define ARMREGISTERINFO_H
#include "ARM.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+ virtual void anchor();
public:
ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
};
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 036822d18ad2..1466e983f3be 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,6 +16,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
field bits<4> Num;
let Namespace = "ARM";
let SubRegs = subregs;
+ // All bits of ARM registers with sub-registers are covered by sub-registers.
+ let CoveredBySubRegs = 1;
}
class ARMFReg<bits<6> num, string n> : Register<n> {
@@ -25,28 +27,30 @@ class ARMFReg<bits<6> num, string n> : Register<n> {
// Subregister indices.
let Namespace = "ARM" in {
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+
// Note: Code depends on these having consecutive numbers.
-def ssub_0 : SubRegIndex;
-def ssub_1 : SubRegIndex;
-def ssub_2 : SubRegIndex; // In a Q reg.
-def ssub_3 : SubRegIndex;
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>;
+def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>;
def dsub_0 : SubRegIndex;
def dsub_1 : SubRegIndex;
-def dsub_2 : SubRegIndex;
-def dsub_3 : SubRegIndex;
-def dsub_4 : SubRegIndex;
-def dsub_5 : SubRegIndex;
-def dsub_6 : SubRegIndex;
-def dsub_7 : SubRegIndex;
+def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>;
+def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>;
+def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>;
+def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>;
+def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>;
+def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>;
-def qsub_0 : SubRegIndex;
-def qsub_1 : SubRegIndex;
-def qsub_2 : SubRegIndex;
-def qsub_3 : SubRegIndex;
-
-def qqsub_0 : SubRegIndex;
-def qqsub_1 : SubRegIndex;
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
+def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
+// Let TableGen synthesize the remaining 12 ssub_* indices.
+// We don't need to name them.
}
// Integer registers
@@ -127,9 +131,7 @@ def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
// Advanced SIMD (NEON) defines 16 quad-word aliases
-let SubRegIndices = [dsub_0, dsub_1],
- CompositeIndices = [(ssub_2 dsub_1, ssub_0),
- (ssub_3 dsub_1, ssub_1)] in {
+let SubRegIndices = [dsub_0, dsub_1] in {
def Q0 : ARMReg< 0, "q0", [D0, D1]>;
def Q1 : ARMReg< 1, "q1", [D2, D3]>;
def Q2 : ARMReg< 2, "q2", [D4, D5]>;
@@ -150,45 +152,22 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>;
def Q15 : ARMReg<15, "q15", [D30, D31]>;
}
-// Pseudo 256-bit registers to represent pairs of Q registers. These should
-// never be present in the emitted code.
-// These are used for NEON load / store instructions, e.g., vld4, vst3.
-// NOTE: It's possible to define more QQ registers since technically the
-// starting D register number doesn't have to be multiple of 4, e.g.,
-// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
-// stuff very messy.
-let SubRegIndices = [qsub_0, qsub_1],
- CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
-def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
-def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
-def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
-def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
-def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
-def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
-def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
-def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
-}
-
-// Pseudo 512-bit registers to represent four consecutive Q registers.
-let SubRegIndices = [qqsub_0, qqsub_1],
- CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
- (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
- (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
-def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
-def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
-def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
-def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
-}
-
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
-def SPSR : ARMReg<2, "spsr">;
-def FPSCR : ARMReg<3, "fpscr">;
-def ITSTATE : ARMReg<4, "itstate">;
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+ let Aliases = [FPSCR];
+}
+def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
// Register classes.
@@ -261,6 +240,12 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
}];
}
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
+
// Scalar single precision floating point register class..
def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
@@ -316,37 +301,100 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
(DPR_8 dsub_0, dsub_1)];
}
+// Pseudo-registers representing odd-even pairs of D registers. The even-odd
+// pairs are already represented by the Q registers.
+// These are needed by NEON instructions requiring two consecutive D registers.
+// There is no D31_D0 register as that is always an UNPREDICTABLE encoding.
+def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2)]>;
+
+// Register class representing a pair of consecutive D registers.
+// Use the Q registers for the even-odd pairs.
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (interleave QPR, TuplesOE2D)> {
+ // Allocate starting at non-VFP2 registers D16-D31 first.
+ // Prefer even-odd pairs as they are easier to copy.
+ let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Pseudo-registers representing 3 consecutive D registers.
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 1),
+ (shl DPR, 2)]>;
+
+// 3 consecutive D registers.
+def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
+
// Pseudo 256-bit vector register class to model pairs of Q registers
// (4 consecutive D registers).
-def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
(QPR qsub_0, qsub_1)];
// Allocate non-VFP2 aliases first.
- let AltOrders = [(rotl QQPR, 4)];
+ let AltOrders = [(rotl QQPR, 8)];
let AltOrderSelect = [{ return 1; }];
}
-// Subset of QQPR that have 32-bit SPR subregs.
-def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
- let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
- (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
- (QPR_VFP2 qsub_0, qsub_1)];
+// Tuples of 4 D regs that isn't also a pair of Q regs.
+def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2),
+ (decimate (shl DPR, 3), 2),
+ (decimate (shl DPR, 4), 2)]>;
-}
+// 4 consecutive D registers.
+def DQuad : RegisterClass<"ARM", [v4i64], 256,
+ (interleave Tuples2Q, TuplesOE4D)>;
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
+ [(shl QQPR, 0), (shl QQPR, 2)]>;
// Pseudo 512-bit vector register class to model 4 consecutive Q registers
// (8 consecutive D registers).
-def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
dsub_4, dsub_5, dsub_6, dsub_7),
(QPR qsub_0, qsub_1, qsub_2, qsub_3)];
// Allocate non-VFP2 aliases first.
- let AltOrders = [(rotl QQQQPR, 2)];
+ let AltOrders = [(rotl QQQQPR, 8)];
let AltOrderSelect = [{ return 1; }];
}
-// Condition code registers.
-def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
- let CopyCost = -1; // Don't allow copying of status registers.
- let isAllocatable = 0;
+
+// Pseudo-registers representing 2-spaced consecutive D registers.
+def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 2)]>;
+
+// Spaced pairs of D registers.
+def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>;
+
+def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4)]>;
+
+// Spaced triples of D registers.
+def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
}
+
+def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4),
+ (shl DPR, 6)]>;
+
+// Spaced quads of D registers.
+def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 86e7206f2cc6..21877fd9af37 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -1,4 +1,4 @@
-//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 958c5c647013..45486fd0b6dd 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -1,10 +1,10 @@
-//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
-//
+//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -118,6 +118,8 @@ def IIC_fpMUL32 : InstrItinClass;
def IIC_fpMUL64 : InstrItinClass;
def IIC_fpMAC32 : InstrItinClass;
def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpFMAC32 : InstrItinClass;
+def IIC_fpFMAC64 : InstrItinClass;
def IIC_fpDIV32 : InstrItinClass;
def IIC_fpDIV64 : InstrItinClass;
def IIC_fpSQRT32 : InstrItinClass;
@@ -208,6 +210,8 @@ def IIC_VPERMQ : InstrItinClass;
def IIC_VPERMQ3 : InstrItinClass;
def IIC_VMACD : InstrItinClass;
def IIC_VMACQ : InstrItinClass;
+def IIC_VFMACD : InstrItinClass;
+def IIC_VFMACQ : InstrItinClass;
def IIC_VRECSD : InstrItinClass;
def IIC_VRECSQ : InstrItinClass;
def IIC_VCNTiD : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8d86c01dc741..8b1fb9386ad5 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<19, [A8_NPipe], 0>,
InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
//
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<20, [A8_NPipe], 0>,
@@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
//
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 49fedf63f8bc..0d710cc1acee 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_NPipe]>],
[9, 1, 1, 1]>,
//
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
@@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<4, [A9_NPipe]>],
[8, 4, 2, 1]>,
//
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index c1880a72fff3..0ace9bc1796d 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -1,10 +1,10 @@
-//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
-//
+//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM v6 processors.
@@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries<
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
//
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
//
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a3a3d58841db..e2530d07e237 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -67,7 +67,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, 0);
+ false, false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -105,7 +105,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ SrcPtrInfo.getWithOffset(SrcOff),
+ false, false, false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -144,8 +145,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
- // Use default for non AAPCS subtargets
- if (!Subtarget->isAAPCS_ABI())
+ // Use default for non AAPCS (or Darwin) subtargets
+ if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin())
return SDValue();
const ARMTargetLowering &TLI =
@@ -188,6 +189,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
0, // number of fixed arguments
TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
false, // is tail call
+ false, // does not return
false, // is return val used
DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()), // callee
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 247d6be59ad4..e247b76ad43b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -1,4 +1,4 @@
-//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,7 +16,6 @@
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -47,13 +46,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasV7Ops(false)
, HasVFPv2(false)
, HasVFPv3(false)
+ , HasVFPv4(false)
, HasNEON(false)
, UseNEONForSinglePrecisionFP(false)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
, InThumbMode(false)
- , InNaClMode(false)
, HasThumb2(false)
, IsMClass(false)
, NoARM(false)
@@ -104,18 +103,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
computeIssueWidth();
if (TT.find("eabi") != std::string::npos)
+ // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
+ // Darwin-EABI conforms to AACPS but not the rest of EABI.
TargetABI = ARM_ABI_AAPCS;
if (isAAPCS_ABI())
stackAlignment = 8;
- if (!isTargetDarwin())
+ if (!isTargetIOS())
UseMovt = hasV6T2Ops();
else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
UseMovt = DarwinUseMOVT && hasV6T2Ops();
- const Triple &T = getTargetTriple();
- SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0);
+ SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
}
if (!isThumb() || hasThumb2())
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index b63e1085fb83..e72b06fa3fcc 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -1,4 +1,4 @@
-//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -45,10 +45,11 @@ protected:
bool HasV6T2Ops;
bool HasV7Ops;
- /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
- /// supported.
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what
+ /// floating point ISAs are supported.
bool HasVFPv2;
bool HasVFPv3;
+ bool HasVFPv4;
bool HasNEON;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
@@ -70,9 +71,6 @@ protected:
/// InThumbMode - True if compiling for Thumb, false for ARM.
bool InThumbMode;
- /// InNaClMode - True if targeting Native Client
- bool InNaClMode;
-
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
@@ -126,6 +124,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate;
+ /// HasRAS - Some processors perform return stack prediction. CodeGen should
+ /// avoid issue "normal" call instructions to callees which do not return.
+ bool HasRAS;
+
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension;
@@ -194,11 +196,13 @@ protected:
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+ bool isCortexM3() const { return CPUString == "cortex-m3"; }
bool hasARMOps() const { return !NoARM; }
bool hasVFP2() const { return HasVFPv2; }
bool hasVFP3() const { return HasVFPv3; }
+ bool hasVFP4() const { return HasVFPv4; }
bool hasNEON() const { return HasNEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -212,6 +216,7 @@ protected:
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
@@ -220,6 +225,7 @@ protected:
const Triple &getTargetTriple() const { return TargetTriple; }
+ bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetNaCl() const {
return TargetTriple.getOS() == Triple::NativeClient;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 96b1e89b0df0..047efc23a4ea 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
static cl::opt<bool>
@@ -33,24 +34,32 @@ extern "C" void LLVMInitializeARMTarget() {
RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
}
+
/// TargetMachine ctor - Create an ARM architecture model.
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Soft;
}
+void ARMTargetMachine::anchor() { }
+
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
@@ -68,10 +77,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
"support ARM mode execution!");
}
+void ThumbTargetMachine::anchor() { }
+
ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -94,37 +107,62 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
-bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
- PM.add(createARMGlobalMergePass(getTargetLowering()));
+namespace {
+/// ARM Code Generator Pass Configuration Options.
+class ARMPassConfig : public TargetPassConfig {
+public:
+ ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ ARMBaseTargetMachine &getARMTargetMachine() const {
+ return getTM<ARMBaseTargetMachine>();
+ }
+
+ const ARMSubtarget &getARMSubtarget() const {
+ return *getARMTargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new ARMPassConfig(this, PM);
+}
+
+bool ARMPassConfig::addPreISel() {
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
+ PM.add(createGlobalMergePass(TM->getTargetLowering()));
return false;
}
-bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createARMISelDag(*this, OptLevel));
+bool ARMPassConfig::addInstSelector() {
+ PM.add(createARMISelDag(getARMTargetMachine(), getOptLevel()));
return false;
}
-bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
- if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
PM.add(createMLxExpansionPass());
return true;
}
-bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMPassConfig::addPreSched2() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (OptLevel != CodeGenOpt::None) {
- if (!Subtarget.isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!getARMSubtarget().isThumb1Only()) {
PM.add(createARMLoadStoreOptimizationPass());
- if (Subtarget.hasNEON())
+ printAndVerify("After ARM load / store optimizer");
+ }
+ if (getARMSubtarget().hasNEON())
PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
@@ -132,27 +170,31 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
// proper scheduling.
PM.add(createARMExpandPseudoPass());
- if (OptLevel != CodeGenOpt::None) {
- if (!Subtarget.isThumb1Only())
- PM.add(createIfConverterPass());
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!getARMSubtarget().isThumb1Only())
+ addPass(IfConverterID);
}
- if (Subtarget.isThumb2())
+ if (getARMSubtarget().isThumb2())
PM.add(createThumb2ITBlockPass());
return true;
}
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
- PM.add(createThumb2SizeReductionPass());
+bool ARMPassConfig::addPreEmitPass() {
+ if (getARMSubtarget().isThumb2()) {
+ if (!getARMSubtarget().prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ addPass(UnpackMachineBundlesID);
+ }
PM.add(createARMConstantIslandPass());
+
return true;
}
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
// Machine code emitter pass for ARM.
PM.add(createARMJITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c8c601c30171..abcdb24c0c69 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,7 +41,9 @@ private:
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -50,18 +52,15 @@ public:
}
// Pass Pipeline Configuration
- virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- JITCodeEmitter &MCE);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
};
/// ARMTargetMachine - ARM target machine.
///
class ARMTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
ARMInstrInfo InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMELFWriterInfo ELFWriterInfo;
@@ -71,7 +70,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const ARMRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -100,6 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
/// Thumb-1 and Thumb-2.
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
// Either Thumb1InstrInfo or Thumb2InstrInfo.
OwningPtr<ARMBaseInstrInfo> InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
@@ -111,7 +113,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
/// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1b5196..a5ea1c202e2c 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
using namespace llvm;
using namespace dwarf;
@@ -24,8 +25,9 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
- if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+ if (isAAPCS_ABI) {
StaticCtorSection =
getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
ELF::SHF_WRITE |
@@ -45,3 +47,33 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
+
+const MCSection *
+ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const {
+ if (!isAAPCS_ABI)
+ return TargetLoweringObjectFileELF::getStaticCtorSection(Priority);
+
+ if (Priority == 65535)
+ return StaticCtorSection;
+
+ // Emit ctors in priority order.
+ std::string Name = std::string(".init_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *
+ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const {
+ if (!isAAPCS_ABI)
+ return TargetLoweringObjectFileELF::getStaticDtorSection(Priority);
+
+ if (Priority == 65535)
+ return StaticDtorSection;
+
+ // Emit dtors in priority order.
+ std::string Name = std::string(".fini_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261439d7..ff210604148d 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -20,6 +20,7 @@ class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
protected:
const MCSection *AttributesSection;
+ bool isAAPCS_ABI;
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
@@ -31,6 +32,9 @@ public:
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
+
+ const MCSection * getStaticCtorSection(unsigned Priority) const;
+ const MCSection * getStaticDtorSection(unsigned Priority) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index 14d35ba54654..fda8536fcf6b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -17,9 +17,6 @@
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include <string>
@@ -107,11 +104,9 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() {
SetError(Lexer->getErrLoc(), Lexer->getErr());
break;
case AsmToken::Identifier: {
- std::string upperCase = lexedToken.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
- StringRef lowerRef(lowerCase);
+ std::string lowerCase = lexedToken.getString().lower();
- unsigned regID = MatchRegisterName(lowerRef);
+ unsigned regID = MatchRegisterName(lowerCase);
// Check for register aliases.
// r13 -> sp
// r14 -> lr
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 24f15b4694ff..e55a7dad45db 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -40,9 +39,15 @@ namespace {
class ARMOperand;
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCRegisterInfo *MRI;
+
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
@@ -91,9 +96,14 @@ class ARMAsmParser : public MCTargetAsmParser {
unsigned &ShiftAmount);
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
bool parseDirectiveThumbFunc(SMLoc L);
bool parseDirectiveCode(SMLoc L);
bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
+ bool parseDirectiveArch(SMLoc L);
+ bool parseDirectiveEabiAttr(SMLoc L);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +171,8 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -197,10 +209,18 @@ class ARMAsmParser : public MCTargetAsmParser {
const SmallVectorImpl<MCParsedAsmOperand*> &);
bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- void processInstruction(MCInst &Inst,
+ bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool shouldOmitCCOutOperand(StringRef Mnemonic,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -217,6 +237,9 @@ public:
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
+ // Cache the MCRegisterInfo.
+ MRI = &getContext().getRegisterInfo();
+
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -251,7 +274,6 @@ class ARMOperand : public MCParsedAsmOperand {
k_CoprocReg,
k_CoprocOption,
k_Immediate,
- k_FPImmediate,
k_MemBarrierOpt,
k_Memory,
k_PostIndexRegister,
@@ -262,6 +284,9 @@ class ARMOperand : public MCParsedAsmOperand {
k_RegisterList,
k_DPRRegisterList,
k_SPRRegisterList,
+ k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
k_ShiftedRegister,
k_ShiftedImmediate,
k_ShifterImmediate,
@@ -311,6 +336,14 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned RegNum;
} Reg;
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ } VectorList;
+
struct {
unsigned Val;
} VectorIndex;
@@ -319,10 +352,6 @@ class ARMOperand : public MCParsedAsmOperand {
const MCExpr *Val;
} Imm;
- struct {
- unsigned Val; // encoded 8-bit representation
- } FPImm;
-
/// Combined record for all forms of ARM address expressions.
struct {
unsigned BaseRegNum;
@@ -333,7 +362,7 @@ class ARMOperand : public MCParsedAsmOperand {
ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
unsigned ShiftImm; // shift for OffsetReg.
unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (8, 16, or 32)
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
} Memory;
@@ -393,6 +422,11 @@ public:
case k_SPRRegisterList:
Registers = o.Registers;
break;
+ case k_VectorList:
+ case k_VectorListAllLanes:
+ case k_VectorListIndexed:
+ VectorList = o.VectorList;
+ break;
case k_CoprocNum:
case k_CoprocReg:
Cop = o.Cop;
@@ -403,9 +437,6 @@ public:
case k_Immediate:
Imm = o.Imm;
break;
- case k_FPImmediate:
- FPImm = o.FPImm;
- break;
case k_MemBarrierOpt:
MBOpt = o.MBOpt;
break;
@@ -474,15 +505,10 @@ public:
}
const MCExpr *getImm() const {
- assert(Kind == k_Immediate && "Invalid access!");
+ assert(isImm() && "Invalid access!");
return Imm.Val;
}
- unsigned getFPImm() const {
- assert(Kind == k_FPImmediate && "Invalid access!");
- return FPImm.Val;
- }
-
unsigned getVectorIndex() const {
assert(Kind == k_VectorIndex && "Invalid access!");
return VectorIndex.Val;
@@ -511,90 +537,219 @@ public:
bool isITMask() const { return Kind == k_ITCondMask; }
bool isITCondCode() const { return Kind == k_CondCode; }
bool isImm() const { return Kind == k_Immediate; }
- bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isFPImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ return Val != -1;
+ }
+ bool isFBits16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 16;
+ }
+ bool isFBits32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 1 && Value <= 32;
+ }
bool isImm8s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
bool isImm0_1020s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
}
bool isImm0_508s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
}
+ bool isImm0_508s4Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ // explicitly exclude zero. we want that to use the normal 0_508 version.
+ return ((Value & 3) == 0) && Value > 0 && Value <= 508;
+ }
bool isImm0_255() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 256;
}
+ bool isImm0_4095() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4096;
+ }
+ bool isImm0_4095Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ return Value > 0 && Value < 4096;
+ }
+ bool isImm0_1() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
bool isImm0_7() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 8;
}
bool isImm0_15() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 16;
}
bool isImm0_31() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
+ bool isImm0_63() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
bool isImm1_16() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value < 17;
}
bool isImm1_32() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value < 33;
}
+ bool isImm0_32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 33;
+ }
bool isImm0_65535() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
bool isImm0_65535Expr() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// If it's not a constant expression, it'll generate a fixup and be
// handled later.
@@ -603,56 +758,81 @@ public:
return Value >= 0 && Value < 65536;
}
bool isImm24bit() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value <= 0xffffff;
}
bool isImmThumbSR() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value < 33;
}
bool isPKHLSLImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
bool isPKHASRImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value <= 32;
}
bool isARMSOImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ARM_AM::getSOImmVal(Value) != -1;
}
+ bool isARMSOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(~Value) != -1;
+ }
+ bool isARMSOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getSOImmVal(Value) == -1 &&
+ ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isT2SOImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ARM_AM::getT2SOImmVal(Value) != -1;
}
+ bool isT2SOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(~Value) != -1;
+ }
+ bool isT2SOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
bool isSetEndImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
@@ -672,7 +852,7 @@ public:
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
bool isPostIdxReg() const {
- return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
}
bool isMemNoOffset(bool alignOK = false) const {
if (!isMemory())
@@ -681,6 +861,17 @@ public:
return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
(alignOK || Memory.Alignment == 0);
}
+ bool isMemPCRelImm12() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base register must be PC.
+ if (Memory.BaseRegNum != ARM::PC)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
bool isAlignedMemory() const {
return isMemNoOffset(true);
}
@@ -694,8 +885,7 @@ public:
return Val > -4096 && Val < 4096;
}
bool isAM2OffsetImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
// Immediate offset in range [-4095, 4095].
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -703,6 +893,11 @@ public:
return Val > -4096 && Val < 4096;
}
bool isAddrMode3() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
if (!isMemory() || Memory.Alignment != 0) return false;
// No shifts are legal for AM3.
if (Memory.ShiftType != ARM_AM::no_shift) return false;
@@ -726,6 +921,11 @@ public:
return (Val > -256 && Val < 256) || Val == INT32_MIN;
}
bool isAddrMode5() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
if (!isMemory() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return false;
@@ -733,7 +933,7 @@ public:
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
- Val == INT32_MIN;
+ Val == INT32_MIN;
}
bool isMemTBB() const {
if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
@@ -810,6 +1010,11 @@ public:
return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
}
bool isMemImm8s4Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [-1020, 1020].
@@ -828,6 +1033,8 @@ public:
bool isMemImm8Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
// Immediate offset in range [-255, 255].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
@@ -844,18 +1051,14 @@ public:
bool isMemNegImm8Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
// Immediate offset in range [-255, -1].
- if (!Memory.OffsetImm) return true;
+ if (!Memory.OffsetImm) return false;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 0;
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
- // If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup. If it is a constant, it's something else
- // and we reject it.
- if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
- return true;
-
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 4095].
@@ -867,7 +1070,7 @@ public:
// If we have an immediate that's not a constant, treat it as a label
// reference needing a fixup. If it is a constant, it's something else
// and we reject it.
- if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -878,16 +1081,14 @@ public:
return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
}
bool isPostIdxImm8() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Val = CE->getValue();
return (Val > -256 && Val < 256) || (Val == INT32_MIN);
}
bool isPostIdxImm8s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Val = CE->getValue();
@@ -898,6 +1099,188 @@ public:
bool isMSRMask() const { return Kind == k_MSRMask; }
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+ // NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPair() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListDPairSpaced() const {
+ if (isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPairAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListDPairSpacedAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListThreeDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListThreeQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListFourQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListThreeDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListFourDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
bool isVectorIndex8() const {
if (Kind != k_VectorIndex) return false;
return VectorIndex.Val < 8;
@@ -911,7 +1294,82 @@ public:
return VectorIndex.Val < 2;
}
+ bool isNEONi8splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i8 value splatted across 8 bytes. The immediate is just the 8 byte
+ // value.
+ return Value >= 0 && Value < 256;
+ }
+
+ bool isNEONi16splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i16 value in the range [0,255] or [0x0100, 0xff00]
+ return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+ }
+
+ bool isNEONi32splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000);
+ }
+ bool isNEONi32vmov() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+ bool isNEONi32vmovNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = ~CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+
+ bool isNEONi64splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+ // i64 value with each byte being either 0 or 0xff.
+ for (unsigned i = 0; i < 8; ++i)
+ if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false;
+ return true;
+ }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
@@ -967,7 +1425,8 @@ public:
void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
- assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!");
+ assert(isRegShiftedReg() &&
+ "addRegShiftedRegOperands() on non RegShiftedReg!");
Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
Inst.addOperand(MCOperand::CreateImm(
@@ -976,7 +1435,8 @@ public:
void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
- assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!");
+ assert(isRegShiftedImm() &&
+ "addRegShiftedImmOperands() on non RegShiftedImm!");
Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
Inst.addOperand(MCOperand::CreateImm(
ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
@@ -1026,9 +1486,23 @@ public:
addExpr(Inst, getImm());
}
+ void addFBits16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue()));
+ }
+
+ void addFBits32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue()));
+ }
+
void addFPImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ Inst.addOperand(MCOperand::CreateImm(Val));
}
void addImm8s4Operands(MCInst &Inst, unsigned N) const {
@@ -1047,32 +1521,20 @@ public:
Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
}
- void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate is scaled by four in the encoding and is stored
// in the MCInst as such. Lop off the low two bits here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4)));
}
- void addImm0_255Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_7Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_15Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
}
void addImm1_16Operands(MCInst &Inst, unsigned N) const {
@@ -1091,21 +1553,6 @@ public:
Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
}
- void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm24bitOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The constant encodes as the immediate, except for 32, which encodes as
@@ -1115,11 +1562,6 @@ public:
Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
}
- void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// An ASR value of 32 encodes as 0, so that's how we want to add it to
@@ -1129,19 +1571,44 @@ public:
Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
}
- void addARMSOImmOperands(MCInst &Inst, unsigned N) const {
+ void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The operand is actually a t2_so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
- void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
}
- void addSetEndImmOperands(MCInst &Inst, unsigned N) const {
+ void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The operand is actually an imm0_4095, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
+ void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
+ }
+
+ void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
}
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
@@ -1154,6 +1621,14 @@ public:
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
}
+ void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ int32_t Imm = Memory.OffsetImm->getValue();
+ // FIXME: Handle #-0
+ if (Imm == INT32_MIN) Imm = 0;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -1196,6 +1671,16 @@ public:
void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
if (!Memory.OffsetRegNum) {
ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1237,6 +1722,15 @@ public:
void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
// The lower two bits are always zero and as such are not encoded.
int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1250,6 +1744,15 @@ public:
void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
Inst.addOperand(MCOperand::CreateImm(Val));
@@ -1281,7 +1784,7 @@ public:
void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
// If this is an immediate, it's a label reference.
- if (Kind == k_Immediate) {
+ if (isImm()) {
addExpr(Inst, getImm());
Inst.addOperand(MCOperand::CreateImm(0));
return;
@@ -1296,7 +1799,7 @@ public:
void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
// If this is an immediate, it's a label reference.
- if (Kind == k_Immediate) {
+ if (isImm()) {
addExpr(Inst, getImm());
Inst.addOperand(MCOperand::CreateImm(0));
return;
@@ -1322,8 +1825,9 @@ public:
void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
- unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
- Memory.ShiftImm, Memory.ShiftType);
+ unsigned Val =
+ ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
Inst.addOperand(MCOperand::CreateImm(Val));
@@ -1420,6 +1924,17 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ }
+
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
+ }
+
void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
@@ -1435,6 +1950,80 @@ public:
Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
+ void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ // Mask in that this is an i8 splat.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() | 0xe00));
+ }
+
+ void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256)
+ Value = (Value >> 8) | 0xa00;
+ else
+ Value |= 0x800;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xff00)
+ Value = (Value >> 8) | 0x200;
+ else if (Value > 0xffff && Value <= 0xff0000)
+ Value = (Value >> 16) | 0x400;
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = ~CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+ unsigned Imm = 0;
+ for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
+ Imm |= (Value & 1) << i;
+ }
+ Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00));
+ }
+
virtual void print(raw_ostream &OS) const;
static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
@@ -1579,6 +2168,43 @@ public:
return Op;
}
+ static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorList);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+ unsigned Index,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
MCContext &Ctx) {
ARMOperand *Op = new ARMOperand(k_VectorIndex);
@@ -1596,14 +2222,6 @@ public:
return Op;
}
- static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
- ARMOperand *Op = new ARMOperand(k_FPImmediate);
- Op->FPImm.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
static ARMOperand *CreateMem(unsigned BaseRegNum,
const MCConstantExpr *OffsetImm,
unsigned OffsetRegNum,
@@ -1668,10 +2286,6 @@ public:
void ARMOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case k_FPImmediate:
- OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm())
- << ") >";
- break;
case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
@@ -1679,9 +2293,10 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<ccout " << getReg() << ">";
break;
case k_ITCondMask: {
- static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)",
- "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)",
- "(tee)", "(eee)" };
+ static const char *MaskStr[] = {
+ "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
+ "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+ };
assert((ITMask.Mask & 0xf) == ITMask.Mask);
OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
break;
@@ -1735,18 +2350,15 @@ void ARMOperand::print(raw_ostream &OS) const {
break;
case k_ShiftedRegister:
OS << "<so_reg_reg "
- << RegShiftedReg.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
- << ", " << RegShiftedReg.ShiftReg << ", "
- << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
- << ">";
+ << RegShiftedReg.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
+ << " " << RegShiftedReg.ShiftReg << ">";
break;
case k_ShiftedImmediate:
OS << "<so_reg_imm "
- << RegShiftedImm.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
- << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
- << ">";
+ << RegShiftedImm.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
+ << " #" << RegShiftedImm.ShiftImm << ">";
break;
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -1770,6 +2382,18 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << ">";
break;
}
+ case k_VectorList:
+ OS << "<vector_list " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
case k_Token:
OS << "'" << getToken() << "'";
break;
@@ -1788,7 +2412,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister();
+ EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -1801,10 +2427,7 @@ int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
- std::string upperCase = Tok.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
+ std::string lowerCase = Tok.getString().lower();
unsigned RegNum = MatchRegisterName(lowerCase);
if (!RegNum) {
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1812,44 +2435,38 @@ int ARMAsmParser::tryParseRegister() {
.Case("r14", ARM::LR)
.Case("r15", ARM::PC)
.Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
.Default(0);
}
- if (!RegNum) return -1;
+ if (!RegNum) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase);
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
Parser.Lex(); // Eat identifier token.
-#if 0
- // Also check for an index operand. This is only legal for vector registers,
- // but that'll get caught OK in operand matching, so we don't need to
- // explicitly filter everything else out here.
- if (Parser.getTok().is(AsmToken::LBrac)) {
- SMLoc SIdx = Parser.getTok().getLoc();
- Parser.Lex(); // Eat left bracket token.
-
- const MCExpr *ImmVal;
- SMLoc ExprLoc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ImmVal))
- return MatchOperand_ParseFail;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return MatchOperand_ParseFail;
- }
-
- SMLoc E = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return MatchOperand_ParseFail;
- }
-
- Parser.Lex(); // Eat right bracket token.
-
- Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
- SIdx, E,
- getContext()));
- }
-#endif
-
return RegNum;
}
@@ -1864,9 +2481,9 @@ int ARMAsmParser::tryParseShiftRegister(
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
- std::string upperCase = Tok.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
+ std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
.Case("lsl", ARM_AM::lsl)
.Case("lsr", ARM_AM::lsr)
.Case("asr", ARM_AM::asr)
@@ -1895,7 +2512,8 @@ int ARMAsmParser::tryParseShiftRegister(
ShiftReg = SrcReg;
} else {
// Figure out if this is shifted by a constant or a register (for non-RRX).
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
@@ -1919,6 +2537,10 @@ int ARMAsmParser::tryParseShiftRegister(
Error(ImmLoc, "immediate shift value out of range");
return -1;
}
+ // shift by zero is a nop. Always send it through as lsl.
+ // ('as' compatibility)
+ if (Imm == 0)
+ ShiftTy = ARM_AM::lsl;
} else if (Parser.getTok().is(AsmToken::Identifier)) {
ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
@@ -1976,20 +2598,15 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat left bracket token.
const MCExpr *ImmVal;
- SMLoc ExprLoc = Parser.getTok().getLoc();
if (getParser().ParseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return true;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return TokError("immediate value expected for vector index");
SMLoc E = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
Parser.Lex(); // Eat right bracket token.
@@ -2008,7 +2625,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
// Use the same layout as the tablegen'erated register name matcher. Ugly,
// but efficient.
switch (Name.size()) {
- default: break;
+ default: return -1;
case 2:
if (Name[0] != CoprocOp)
return -1;
@@ -2025,7 +2642,6 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
case '8': return 8;
case '9': return 9;
}
- break;
case 3:
if (Name[0] != CoprocOp || Name[1] != '1')
return -1;
@@ -2038,10 +2654,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
case '4': return 14;
case '5': return 15;
}
- break;
}
-
- return -1;
}
/// parseITCondCode - Try to parse a condition code for an IT instruction.
@@ -2161,7 +2774,7 @@ static unsigned getNextRegister(unsigned Reg) {
if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
return Reg + 1;
switch(Reg) {
- default: assert(0 && "Invalid GPR number!");
+ default: llvm_unreachable("Invalid GPR number!");
case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
@@ -2173,6 +2786,29 @@ static unsigned getNextRegister(unsigned Reg) {
}
}
+// Return the low-subreg of a given Q register.
+static unsigned getDRegFromQReg(unsigned QReg) {
+ switch (QReg) {
+ default: llvm_unreachable("expected a Q register!");
+ case ARM::Q0: return ARM::D0;
+ case ARM::Q1: return ARM::D2;
+ case ARM::Q2: return ARM::D4;
+ case ARM::Q3: return ARM::D6;
+ case ARM::Q4: return ARM::D8;
+ case ARM::Q5: return ARM::D10;
+ case ARM::Q6: return ARM::D12;
+ case ARM::Q7: return ARM::D14;
+ case ARM::Q8: return ARM::D16;
+ case ARM::Q9: return ARM::D18;
+ case ARM::Q10: return ARM::D20;
+ case ARM::Q11: return ARM::D22;
+ case ARM::Q12: return ARM::D24;
+ case ARM::Q13: return ARM::D26;
+ case ARM::Q14: return ARM::D28;
+ case ARM::Q15: return ARM::D30;
+ }
+}
+
/// Parse a register list.
bool ARMAsmParser::
parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -2188,7 +2824,17 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Reg == -1)
return Error(RegLoc, "register expected");
- MCRegisterClass *RC;
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ ++Reg;
+ }
+ const MCRegisterClass *RC;
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
@@ -2198,10 +2844,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
else
return Error(RegLoc, "invalid register in register list");
- // The reglist instructions have at most 16 registers, so reserve
- // space for that many.
- SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
- // Store the first register.
+ // Store the register.
Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
// This starts immediately after the first register token in the list,
@@ -2210,11 +2853,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
- Parser.Lex(); // Eat the comma.
+ Parser.Lex(); // Eat the minus.
SMLoc EndLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
return Error(EndLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
// If the register is the same as the start reg, there's nothing
// more to do.
if (Reg == EndReg)
@@ -2236,15 +2882,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the comma.
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ bool isQReg = false;
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ isQReg = true;
+ }
// The register must be in the same register class as the first.
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
- return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ Warning(RegLoc, "register list not in ascending order");
+ else
+ return Error(RegLoc, "register list not in ascending order");
+ }
+ if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
// VFP register lists must also be contiguous.
// It's OK to use the enumeration values directly here rather, as the
// VFP register classes have the enum sorted properly.
@@ -2252,6 +2914,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Reg != OldReg + 1)
return Error(RegLoc, "non-contiguous register range");
Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ if (isQReg)
+ Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
}
SMLoc E = Parser.getTok().getLoc();
@@ -2259,10 +2923,319 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(E, "'}' expected");
Parser.Lex(); // Eat '}' token.
+ // Push the register list operand.
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
return false;
}
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+
+ // There's an optional '#' token here. Normally there wouldn't be, but
+ // inline assemble puts one in, and it's friendly to accept that.
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat the '#'
+
+ const MCExpr *LaneIndex;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(LaneIndex)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex);
+ if (!CE) {
+ Error(Loc, "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "']' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the ']'.
+ int64_t Val = CE->getValue();
+
+ // FIXME: Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 || Val > 7) {
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ return MatchOperand_ParseFail;
+ }
+ Index = Val;
+ LaneKind = IndexedLane;
+ return MatchOperand_Success;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
+// parse a vector register list
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
+ SMLoc S = Parser.getTok().getLoc();
+ // As an extension (to match gas), support a plain D register or Q register
+ // (without encosing curly braces) as a single or double entry list,
+ // respectively.
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ SMLoc E = Parser.getTok().getLoc();
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ Error(S, "vector register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Count = 1;
+ int Spacing = 0;
+ unsigned FirstReg = Reg;
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
+ ++Reg;
+ ++Count;
+ }
+ if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the minus.
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1) {
+ Error(EndLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+ Error(EndLoc, "invalid register in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Ranges must go from low to high.
+ if (Reg > EndReg) {
+ Error(EndLoc, "bad range in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getLoc();
+
+ // Add all the registers in the range to the register list.
+ Count += EndReg - Reg;
+ Reg = EndReg;
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // vector register lists must be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ //
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
+ Reg = getDRegFromQReg(Reg);
+ if (Reg != OldReg + 1) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Reg;
+ Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ continue;
+ }
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RCurly)) {
+ Error(E, "'}' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '}' token.
+
+ switch (LaneKind) {
+ case NoLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ (Spacing == 2),
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex,
+ (Spacing == 2),
+ S, E));
+ break;
+ }
+ return MatchOperand_Success;
+}
+
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -2337,7 +3310,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isMClass()) {
// See ARMv6-M 10.1.1
- unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ std::string Name = Mask.lower();
+ unsigned FlagsVal = StringSwitch<unsigned>(Name)
.Case("apsr", 0)
.Case("iapsr", 1)
.Case("eapsr", 2)
@@ -2353,14 +3327,14 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
.Case("faultmask", 19)
.Case("control", 20)
.Default(~0U);
-
+
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
-
+
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
return MatchOperand_Success;
@@ -2369,7 +3343,7 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
size_t Start = 0, Next = Mask.find('_');
StringRef Flags = "";
- std::string SpecReg = LowercaseString(Mask.slice(Start, Next));
+ std::string SpecReg = Mask.slice(Start, Next).lower();
if (Next != StringRef::npos)
Flags = Mask.slice(Next+1, Mask.size());
@@ -2392,7 +3366,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
FlagsVal = 8; // No flag
}
} else if (SpecReg == "cpsr" || SpecReg == "spsr") {
- if (Flags == "all") // cpsr_all is an alias for cpsr_fc
+ // cpsr_all is an alias for cpsr_fc, as is plain cpsr.
+ if (Flags == "all" || Flags == "")
Flags = "fc";
for (int i = 0, e = Flags.size(); i != e; ++i) {
unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
@@ -2411,9 +3386,13 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
} else // No match for special register.
return MatchOperand_NoMatch;
- // Special register without flags are equivalent to "fc" flags.
- if (!FlagsVal)
- FlagsVal = 0x9;
+ // Special register without flags is NOT equivalent to "fc" flags.
+ // NOTE: This is a divergence from gas' behavior. Uncommenting the following
+ // two lines would enable gas compatibility at the expense of breaking
+ // round-tripping.
+ //
+ // if (!FlagsVal)
+ // FlagsVal = 0x9;
// Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
if (SpecReg == "spsr")
@@ -2433,8 +3412,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_ParseFail;
}
StringRef ShiftName = Tok.getString();
- std::string LowerOp = LowercaseString(Op);
- std::string UpperOp = UppercaseString(Op);
+ std::string LowerOp = Op.lower();
+ std::string UpperOp = Op.upper();
if (ShiftName != LowerOp && ShiftName != UpperOp) {
Error(Parser.getTok().getLoc(), Op + " operand expected.");
return MatchOperand_ParseFail;
@@ -2442,7 +3421,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
Parser.Lex(); // Eat shift type token.
// There must be a '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2520,7 +3500,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2580,7 +3561,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a rotate amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2617,7 +3599,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2649,7 +3632,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2743,7 +3727,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
// Do immediates first, as we always parse those if we have a '#'.
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat the '#'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
@@ -3082,18 +4067,80 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
}
((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
- ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1);
- // If we have a three-operand form, use that, else the second source operand
- // is just the destination operand again.
- if (Operands.size() == 6)
- ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
- else
- Inst.addOperand(Inst.getOperand(0));
+ // If we have a three-operand form, make sure to set Rn to be the operand
+ // that isn't the same as Rd.
+ unsigned RegOp = 4;
+ if (Operands.size() == 6 &&
+ ((ARMOperand*)Operands[4])->getReg() ==
+ ((ARMOperand*)Operands[3])->getReg())
+ RegOp = 5;
+ ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+ Inst.addOperand(Inst.getOperand(0));
((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
return true;
}
+bool ARMAsmParser::
+cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Vd
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+bool ARMAsmParser::
+cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Vd
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vm
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+bool ARMAsmParser::
+cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vt
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+bool ARMAsmParser::
+cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vm
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ // Vt
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::
@@ -3153,7 +4200,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
unsigned Align = 0;
switch (CE->getValue()) {
default:
- return Error(E, "alignment specifier must be 64, 128, or 256 bits");
+ return Error(E,
+ "alignment specifier must be 16, 32, 64, 128, or 256 bits");
+ case 16: Align = 2; break;
+ case 32: Align = 4; break;
case 64: Align = 8; break;
case 128: Align = 16; break;
case 256: Align = 32; break;
@@ -3182,9 +4232,13 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// If we have a '#', it's an immediate offset, else assume it's a register
- // offset.
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat the '#'.
+ // offset. Be friendly and also accept a plain integer (without a leading
+ // hash) for gas compatibility.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
+ Parser.getTok().is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
E = Parser.getTok().getLoc();
bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -3281,7 +4335,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef ShiftName = Tok.getString();
- if (ShiftName == "lsl" || ShiftName == "LSL")
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
St = ARM_AM::lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
St = ARM_AM::lsr;
@@ -3301,7 +4356,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Loc = Parser.getTok().getLoc();
// A '#' and a shift amount.
const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
return Error(HashTok.getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
@@ -3328,10 +4384,36 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
/// parseFPImm - A floating point immediate expression operand.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Anything that can accept a floating point constant as an operand
+ // needs to go through here, as the regular ParseExpression is
+ // integer only.
+ //
+ // This routine still creates a generic Immediate operand, containing
+ // a bitcast of the 64-bit floating point value. The various operands
+ // that accept floats can check whether the value is valid for them
+ // via the standard is*() predicates.
+
SMLoc S = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
+
+ // Disambiguate the VMOV forms that can accept an FP immediate.
+ // vmov.f32 <sreg>, #imm
+ // vmov.f64 <dreg>, #imm
+ // vmov.f32 <dreg>, #imm @ vector f32x2
+ // vmov.f32 <qreg>, #imm @ vector f32x4
+ //
+ // There are also the NEON VMOV instructions which expect an
+ // integer constant. Make sure we don't try to parse an FPImm
+ // for these:
+ // vmov.i{8|16|32|64} <dreg|qreg>, #imm
+ ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
+ if (!TyOp->isToken() || (TyOp->getToken() != ".f32" &&
+ TyOp->getToken() != ".f64"))
+ return MatchOperand_NoMatch;
+
Parser.Lex(); // Eat the '#'.
// Handle negation, as that still comes through as a separate token.
@@ -3341,34 +4423,39 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex();
}
const AsmToken &Tok = Parser.getTok();
+ SMLoc Loc = Tok.getLoc();
if (Tok.is(AsmToken::Real)) {
- APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
// If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
- int Val = ARM_AM::getFP64Imm(APInt(64, IntVal));
+ IntVal ^= (uint64_t)isNegative << 31;
Parser.Lex(); // Eat the token.
- if (Val == -1) {
- TokError("floating point value out of range");
- return MatchOperand_ParseFail;
- }
- Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(IntVal, getContext()),
+ S, Parser.getTok().getLoc()));
return MatchOperand_Success;
}
+ // Also handle plain integers. Instructions which allow floating point
+ // immediates also allow a raw encoded 8-bit value.
if (Tok.is(AsmToken::Integer)) {
int64_t Val = Tok.getIntVal();
Parser.Lex(); // Eat the token.
if (Val > 255 || Val < 0) {
- TokError("encoded floating point value out of range");
+ Error(Loc, "encoded floating point value out of range");
return MatchOperand_ParseFail;
}
- Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ double RealVal = ARM_AM::getFPImmFloat(Val);
+ Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue();
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(Val, getContext()), S,
+ Parser.getTok().getLoc()));
return MatchOperand_Success;
}
- TokError("invalid floating point immediate");
+ Error(Loc, "invalid floating point immediate");
return MatchOperand_ParseFail;
}
+
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -3391,7 +4478,6 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- // If this is VMRS, check for the apsr_nzcv operand.
if (!tryParseRegisterWithWriteBack(Operands))
return false;
int Res = tryParseShiftRegister(Operands);
@@ -3399,17 +4485,21 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return false;
else if (Res == -1) // irrecoverable error
return true;
- if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
S = Parser.getTok().getLoc();
Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
return false;
}
// Fall though for the Identifier case that is not a register or a
// special name.
}
+ case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::String: // quoted label names.
case AsmToken::Dot: { // . as a branch target
// This was not a register so parse other operands that start with an
// identifier (like labels) as expressions and create them as immediates.
@@ -3425,6 +4515,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands);
+ case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3435,13 +4526,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
if (getParser().ParseExpression(ImmVal))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!CE) {
- Error(S, "constant expression expected");
- return MatchOperand_ParseFail;
+ if (CE) {
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
}
- int32_t Val = CE->getValue();
- if (isNegative && Val == 0)
- ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
return false;
@@ -3524,7 +4613,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
- Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal")
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
+ Mnemonic == "fmuls")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -3565,7 +4655,11 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
- Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
+ Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
+ Mnemonic == "vfms" || Mnemonic == "vfnms" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -3609,6 +4703,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
Mnemonic == "orr" || Mnemonic == "mvn" ||
Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+ Mnemonic == "vfm" || Mnemonic == "vfnm" ||
(!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
Mnemonic == "mla" || Mnemonic == "smlal" ||
Mnemonic == "umlal" || Mnemonic == "umull"))) {
@@ -3677,7 +4772,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- (static_cast<ARMOperand*>(Operands[5])->isReg() ||
+ ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
return true;
// For Thumb2, add/sub immediate does not have a cc_out operand for the
@@ -3694,9 +4789,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
//
// If either register is a high reg, it's either one of the SP
// variants (handled above) or a 32-bit encoding, so we just
- // check against T3.
+ // check against T3. If the second register is the PC, this is an
+ // alternate form of ADR, which uses encoding T4, so check for that too.
if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
!isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
return false;
// If both registers are low, we're in an IT block, and the immediate is
@@ -3726,6 +4823,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// remove the cc_out operand.
(!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
!isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
!inITBlock() ||
(static_cast<ARMOperand*>(Operands[3])->getReg() !=
static_cast<ARMOperand*>(Operands[5])->getReg() &&
@@ -3733,6 +4831,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand*>(Operands[4])->getReg())))
return true;
+ // Also check the 'mul' syntax variant that doesn't specify an explicit
+ // destination register.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ // If the registers aren't low regs or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock()))
+ return true;
+
// Register-register 'add/sub' for thumb does not have a cc_out operand
@@ -3744,15 +4856,52 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
(Operands.size() == 5 || Operands.size() == 6) &&
static_cast<ARMOperand*>(Operands[3])->isReg() &&
static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+ (Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm())))
return true;
return false;
}
+static bool isDataTypeToken(StringRef Tok) {
+ return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
+ Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
+ Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" ||
+ Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" ||
+ Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" ||
+ Tok == ".f" || Tok == ".d";
+}
+
+// FIXME: This bit should probably be handled via an explicit match class
+// in the .td files that matches the suffix instead of having it be
+// a literal string token the way it is now.
+static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
+ return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
+}
+
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemnonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ unsigned AvailableFeatures = getAvailableFeatures();
+ applyMnemonicAliases(Name, AvailableFeatures);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Mnemonic = Name.slice(Start, Next);
@@ -3854,9 +5003,12 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Next = Name.find('.', Start + 1);
StringRef ExtraToken = Name.slice(Start, Next);
- // For now, we're only parsing Thumb1 (for the most part), so
- // just ignore ".n" qualifiers. We'll use them to restrict
- // matching when we do Thumb2.
+ // Some NEON instructions have an optional datatype suffix that is
+ // completely ignored. Check for that.
+ if (isDataTypeToken(ExtraToken) &&
+ doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken))
+ continue;
+
if (ExtraToken != ".n") {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
@@ -3941,12 +5093,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
// Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here.
+ // end. Convert it to a token here. Take care not to convert those
+ // that should hit the Thumb2 encoding.
if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[5])->isImm()) {
ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
+ if (CE && CE->getValue() == 0 &&
+ (isThumbOne() ||
+ // The cc_out operand matches the IT block.
+ ((inITBlock() != CarrySetting) &&
+ // Neither register operand is a high register.
+ (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
Operands.erase(Operands.begin() + 5);
Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
delete Op;
@@ -3990,9 +5151,9 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
// the ARMInsts array) instead. Getting that here requires awkward
// API changes, though. Better way?
namespace llvm {
-extern MCInstrDesc ARMInsts[];
+extern const MCInstrDesc ARMInsts[];
}
-static MCInstrDesc &getInstDesc(unsigned Opcode) {
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return ARMInsts[Opcode];
}
@@ -4000,13 +5161,14 @@ static MCInstrDesc &getInstDesc(unsigned Opcode) {
bool ARMAsmParser::
validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
// Check the IT block state first.
- // NOTE: In Thumb mode, the BKPT instruction has the interesting property of
- // being allowed in IT blocks, but not being predicable. It just always
+ // NOTE: BKPT instruction has the interesting property of being
+ // allowed in IT blocks, but not being predicable. It just always
// executes.
- if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) {
+ if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
+ Inst.getOpcode() != ARM::BKPT) {
unsigned bit = 1;
if (ITState.FirstCond)
ITState.FirstCond = false;
@@ -4115,16 +5277,21 @@ validateInstruction(MCInst &Inst,
"in register list");
break;
}
+ // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
+ // so only issue a diagnostic for thumb1. The instructions will be
+ // switched to the t2 encodings in processInstruction() if necessary.
case ARM::tPOP: {
bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase))
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+ !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
break;
}
case ARM::tPUSH: {
bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase))
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+ !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
break;
@@ -4141,10 +5308,1553 @@ validateInstruction(MCInst &Inst,
return false;
}
-void ARMAsmParser::
+static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32;
+ case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16;
+ case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32;
+
+ // VST3LN
+ case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8;
+ case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16;
+ case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32;
+ case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16;
+ case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32;
+
+ // VST3
+ case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8;
+ case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16;
+ case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32;
+ case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8;
+ case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16;
+ case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32;
+
+ // VST4LN
+ case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8;
+ case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16;
+ case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32;
+ case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16;
+ case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32;
+
+ // VST4
+ case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8;
+ case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16;
+ case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32;
+ case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8;
+ case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16;
+ case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32;
+ }
+}
+
+static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32;
+ case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16;
+ case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32;
+
+ // VLD3DUP
+ case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8;
+ case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16;
+ case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32;
+ case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8;
+ case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16;
+ case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32;
+
+ // VLD3LN
+ case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8;
+ case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16;
+ case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32;
+ case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16;
+ case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32;
+
+ // VLD3
+ case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8;
+ case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16;
+ case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32;
+ case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8;
+ case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16;
+ case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32;
+
+ // VLD4LN
+ case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8;
+ case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16;
+ case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32;
+ case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16;
+ case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32;
+
+ // VLD4DUP
+ case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8;
+ case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16;
+ case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32;
+ case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8;
+ case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16;
+ case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32;
+
+ // VLD4
+ case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8;
+ case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16;
+ case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32;
+ case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8;
+ case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16;
+ case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32;
+ }
+}
+
+bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
+ // Aliases for alternate PC+imm syntax of LDR instructions.
+ case ARM::t2LDRpcrel:
+ Inst.setOpcode(ARM::t2LDRpci);
+ return true;
+ case ARM::t2LDRBpcrel:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ return true;
+ case ARM::t2LDRHpcrel:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ return true;
+ case ARM::t2LDRSBpcrel:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ return true;
+ case ARM::t2LDRSHpcrel:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ return true;
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8:
+ case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8:
+ case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_32:
+ case ARM::VST2LNqWB_register_Asm_16:
+ case ARM::VST2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_register_Asm_8:
+ case ARM::VST3LNdWB_register_Asm_16:
+ case ARM::VST3LNdWB_register_Asm_32:
+ case ARM::VST3LNqWB_register_Asm_16:
+ case ARM::VST3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_register_Asm_8:
+ case ARM::VST4LNdWB_register_Asm_16:
+ case ARM::VST4LNdWB_register_Asm_32:
+ case ARM::VST4LNqWB_register_Asm_16:
+ case ARM::VST4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdWB_fixed_Asm_8:
+ case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8:
+ case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_32:
+ case ARM::VST2LNqWB_fixed_Asm_16:
+ case ARM::VST2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_fixed_Asm_8:
+ case ARM::VST3LNdWB_fixed_Asm_16:
+ case ARM::VST3LNdWB_fixed_Asm_32:
+ case ARM::VST3LNqWB_fixed_Asm_16:
+ case ARM::VST3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_fixed_Asm_8:
+ case ARM::VST4LNdWB_fixed_Asm_16:
+ case ARM::VST4LNdWB_fixed_Asm_32:
+ case ARM::VST4LNqWB_fixed_Asm_16:
+ case ARM::VST4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdAsm_8:
+ case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8:
+ case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_32:
+ case ARM::VST2LNqAsm_16:
+ case ARM::VST2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdAsm_8:
+ case ARM::VST3LNdAsm_16:
+ case ARM::VST3LNdAsm_32:
+ case ARM::VST3LNqAsm_16:
+ case ARM::VST3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdAsm_8:
+ case ARM::VST4LNdAsm_16:
+ case ARM::VST4LNdAsm_32:
+ case ARM::VST4LNqAsm_16:
+ case ARM::VST4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8:
+ case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8:
+ case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_32:
+ case ARM::VLD2LNqWB_register_Asm_16:
+ case ARM::VLD2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_register_Asm_8:
+ case ARM::VLD3LNdWB_register_Asm_16:
+ case ARM::VLD3LNdWB_register_Asm_32:
+ case ARM::VLD3LNqWB_register_Asm_16:
+ case ARM::VLD3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_register_Asm_8:
+ case ARM::VLD4LNdWB_register_Asm_16:
+ case ARM::VLD4LNdWB_register_Asm_32:
+ case ARM::VLD4LNqWB_register_Asm_16:
+ case ARM::VLD4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8:
+ case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8:
+ case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_32:
+ case ARM::VLD2LNqWB_fixed_Asm_16:
+ case ARM::VLD2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_fixed_Asm_8:
+ case ARM::VLD3LNdWB_fixed_Asm_16:
+ case ARM::VLD3LNdWB_fixed_Asm_32:
+ case ARM::VLD3LNqWB_fixed_Asm_16:
+ case ARM::VLD3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_fixed_Asm_8:
+ case ARM::VLD4LNdWB_fixed_Asm_16:
+ case ARM::VLD4LNdWB_fixed_Asm_32:
+ case ARM::VLD4LNqWB_fixed_Asm_16:
+ case ARM::VLD4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8:
+ case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8:
+ case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_32:
+ case ARM::VLD2LNqAsm_16:
+ case ARM::VLD2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdAsm_8:
+ case ARM::VLD3LNdAsm_16:
+ case ARM::VLD3LNdAsm_32:
+ case ARM::VLD3LNqAsm_16:
+ case ARM::VLD3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdAsm_8:
+ case ARM::VLD4LNdAsm_16:
+ case ARM::VLD4LNdAsm_32:
+ case ARM::VLD4LNqAsm_16:
+ case ARM::VLD4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD3DUPdAsm_8:
+ case ARM::VLD3DUPdAsm_16:
+ case ARM::VLD3DUPdAsm_32:
+ case ARM::VLD3DUPqAsm_8:
+ case ARM::VLD3DUPqAsm_16:
+ case ARM::VLD3DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_fixed_Asm_8:
+ case ARM::VLD3DUPdWB_fixed_Asm_16:
+ case ARM::VLD3DUPdWB_fixed_Asm_32:
+ case ARM::VLD3DUPqWB_fixed_Asm_8:
+ case ARM::VLD3DUPqWB_fixed_Asm_16:
+ case ARM::VLD3DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_register_Asm_8:
+ case ARM::VLD3DUPdWB_register_Asm_16:
+ case ARM::VLD3DUPdWB_register_Asm_32:
+ case ARM::VLD3DUPqWB_register_Asm_8:
+ case ARM::VLD3DUPqWB_register_Asm_16:
+ case ARM::VLD3DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3 multiple 3-element structure instructions.
+ case ARM::VLD3dAsm_8:
+ case ARM::VLD3dAsm_16:
+ case ARM::VLD3dAsm_32:
+ case ARM::VLD3qAsm_8:
+ case ARM::VLD3qAsm_16:
+ case ARM::VLD3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_fixed_Asm_8:
+ case ARM::VLD3dWB_fixed_Asm_16:
+ case ARM::VLD3dWB_fixed_Asm_32:
+ case ARM::VLD3qWB_fixed_Asm_8:
+ case ARM::VLD3qWB_fixed_Asm_16:
+ case ARM::VLD3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_register_Asm_8:
+ case ARM::VLD3dWB_register_Asm_16:
+ case ARM::VLD3dWB_register_Asm_32:
+ case ARM::VLD3qWB_register_Asm_8:
+ case ARM::VLD3qWB_register_Asm_16:
+ case ARM::VLD3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD4DUPdAsm_8:
+ case ARM::VLD4DUPdAsm_16:
+ case ARM::VLD4DUPdAsm_32:
+ case ARM::VLD4DUPqAsm_8:
+ case ARM::VLD4DUPqAsm_16:
+ case ARM::VLD4DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_fixed_Asm_8:
+ case ARM::VLD4DUPdWB_fixed_Asm_16:
+ case ARM::VLD4DUPdWB_fixed_Asm_32:
+ case ARM::VLD4DUPqWB_fixed_Asm_8:
+ case ARM::VLD4DUPqWB_fixed_Asm_16:
+ case ARM::VLD4DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_register_Asm_8:
+ case ARM::VLD4DUPdWB_register_Asm_16:
+ case ARM::VLD4DUPdWB_register_Asm_32:
+ case ARM::VLD4DUPqWB_register_Asm_8:
+ case ARM::VLD4DUPqWB_register_Asm_16:
+ case ARM::VLD4DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4 multiple 4-element structure instructions.
+ case ARM::VLD4dAsm_8:
+ case ARM::VLD4dAsm_16:
+ case ARM::VLD4dAsm_32:
+ case ARM::VLD4qAsm_8:
+ case ARM::VLD4qAsm_16:
+ case ARM::VLD4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_fixed_Asm_8:
+ case ARM::VLD4dWB_fixed_Asm_16:
+ case ARM::VLD4dWB_fixed_Asm_32:
+ case ARM::VLD4qWB_fixed_Asm_8:
+ case ARM::VLD4qWB_fixed_Asm_16:
+ case ARM::VLD4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_register_Asm_8:
+ case ARM::VLD4dWB_register_Asm_16:
+ case ARM::VLD4dWB_register_Asm_32:
+ case ARM::VLD4qWB_register_Asm_8:
+ case ARM::VLD4qWB_register_Asm_16:
+ case ARM::VLD4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST3 multiple 3-element structure instructions.
+ case ARM::VST3dAsm_8:
+ case ARM::VST3dAsm_16:
+ case ARM::VST3dAsm_32:
+ case ARM::VST3qAsm_8:
+ case ARM::VST3qAsm_16:
+ case ARM::VST3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_fixed_Asm_8:
+ case ARM::VST3dWB_fixed_Asm_16:
+ case ARM::VST3dWB_fixed_Asm_32:
+ case ARM::VST3qWB_fixed_Asm_8:
+ case ARM::VST3qWB_fixed_Asm_16:
+ case ARM::VST3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_register_Asm_8:
+ case ARM::VST3dWB_register_Asm_16:
+ case ARM::VST3dWB_register_Asm_32:
+ case ARM::VST3qWB_register_Asm_8:
+ case ARM::VST3qWB_register_Asm_16:
+ case ARM::VST3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST4 multiple 3-element structure instructions.
+ case ARM::VST4dAsm_8:
+ case ARM::VST4dAsm_16:
+ case ARM::VST4dAsm_32:
+ case ARM::VST4qAsm_8:
+ case ARM::VST4qAsm_16:
+ case ARM::VST4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_fixed_Asm_8:
+ case ARM::VST4dWB_fixed_Asm_16:
+ case ARM::VST4dWB_fixed_Asm_32:
+ case ARM::VST4qWB_fixed_Asm_8:
+ case ARM::VST4qWB_fixed_Asm_16:
+ case ARM::VST4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_register_Asm_8:
+ case ARM::VST4dWB_register_Asm_16:
+ case ARM::VST4dWB_register_Asm_32:
+ case ARM::VST4qWB_register_Asm_8:
+ case ARM::VST4qWB_register_Asm_16:
+ case ARM::VST4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle encoding choice for the shift-immediate instructions.
+ case ARM::t2LSLri:
+ case ARM::t2LSRri:
+ case ARM::t2ASRri: {
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLri: NewOpc = ARM::tLSLri; break;
+ case ARM::t2LSRri: NewOpc = ARM::tLSRri; break;
+ case ARM::t2ASRri: NewOpc = ARM::tASRri; break;
+ }
+ // The Thumb1 operands aren't in the same order. Awesome, eh?
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsr:
+ case ARM::t2MOVSsr: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break;
+ case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break;
+ }
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Ammount == 32) Ammount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (newOpc != ARM::t2RRX)
+ TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
+ case ARM::ASRr:
+ case ARM::LSRr:
+ case ARM::LSLr:
+ case ARM::RORr: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRr: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRr: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLr: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORr: ShiftTy = ARM_AM::ror; break;
+ }
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsr);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::ASRi:
+ case ARM::LSRi:
+ case ARM::LSLi:
+ case ARM::RORi: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRi: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRi: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLi: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORi: ShiftTy = ARM_AM::ror; break;
+ }
+ // A shift by zero is a plain MOVr, not a MOVsi.
+ unsigned Amt = Inst.getOperand(2).getImm();
+ unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (Opc == ARM::MOVsi)
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::RRXi: {
+ unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsi);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDMIA_UPD: {
+ // If this is a load of a single register, then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDR_POST);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2STMDB_UPD: {
+ // If this is a store of a single register, then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2STR_PRE);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::LDMIA_UPD:
// If this is a load of a single register via a 'pop', then we should use
// a post-indexed LDR instruction instead, per the ARM ARM.
@@ -4160,6 +6870,7 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(2)); // CondCode
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
+ return true;
}
break;
case ARM::STMDB_UPD:
@@ -4178,41 +6889,117 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
}
break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
case ARM::tADDi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
// to encoding T2 if <Rd> is specified and encoding T2 is preferred
// to encoding T1 if <Rd> is omitted."
- if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
Inst.setOpcode(ARM::tADDi3);
+ return true;
+ }
break;
case ARM::tSUBi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
// to encoding T2 if <Rd> is specified and encoding T2 is preferred
// to encoding T1 if <Rd> is omitted."
- if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
Inst.setOpcode(ARM::tSUBi3);
+ return true;
+ }
break;
+ case ARM::t2ADDri:
+ case ARM::t2SUBri: {
+ // If the destination and first source operand are the same, and
+ // the flags are compatible with the current IT status, use encoding T2
+ // instead of T3. For compatibility with the system 'as'. Make sure the
+ // wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ !isARMLowRegister(Inst.getOperand(0).getReg()) ||
+ (unsigned)Inst.getOperand(2).getImm() > 255 ||
+ ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
+ ARM::tADDi8 : ARM::tSUBi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
- if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
Inst.setOpcode(ARM::tBcc);
+ return true;
+ }
break;
case ARM::t2B:
// A Thumb2 conditional branch outside of an IT block is a t2Bcc.
- if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){
Inst.setOpcode(ARM::t2Bcc);
+ return true;
+ }
break;
case ARM::t2Bcc:
// If the conditional is AL or we're in an IT block, we really want t2B.
- if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock())
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) {
Inst.setOpcode(ARM::t2B);
+ return true;
+ }
break;
case ARM::tBcc:
// If the conditional is AL, we really want tB.
- if (Inst.getOperand(1).getImm() == ARMCC::AL)
+ if (Inst.getOperand(1).getImm() == ARMCC::AL) {
Inst.setOpcode(ARM::tB);
+ return true;
+ }
break;
case ARM::tLDMIA: {
// If the register list contains any high registers, or if the writeback
@@ -4235,6 +7022,7 @@ processInstruction(MCInst &Inst,
if (hasWritebackToken)
Inst.insert(Inst.begin(),
MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+ return true;
}
break;
}
@@ -4248,14 +7036,40 @@ processInstruction(MCInst &Inst,
// 16-bit encoding isn't sufficient. Switch to the 32-bit version.
assert (isThumbTwo());
Inst.setOpcode(ARM::t2STMIA_UPD);
+ return true;
}
break;
}
+ case ARM::tPOP: {
+ bool listContainsBase;
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2LDMIA_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ return true;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMDB_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ return true;
+ }
case ARM::t2MOVi: {
// If we can use the 16-bit encoding and the user didn't explicitly
// request the 32-bit variant, transform it here.
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- Inst.getOperand(1).getImm() <= 255 &&
+ (unsigned)Inst.getOperand(1).getImm() <= 255 &&
((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR) ||
(inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
@@ -4270,6 +7084,7 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
+ return true;
}
break;
}
@@ -4290,6 +7105,7 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
+ return true;
}
break;
}
@@ -4320,9 +7136,61 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(3));
TmpInst.addOperand(Inst.getOperand(4));
Inst = TmpInst;
+ return true;
}
break;
}
+ case ARM::MOVsi: {
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
+ if (SOpc == ARM_AM::rrx) return false;
+ if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
+ // Shifting by zero is accepted as a vanilla 'MOVr'
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ANDrsi:
+ case ARM::ORRrsi:
+ case ARM::EORrsi:
+ case ARM::BICrsi:
+ case ARM::SUBrsi:
+ case ARM::ADDrsi: {
+ unsigned newOpc;
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm());
+ if (SOpc == ARM_AM::rrx) return false;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ANDrsi: newOpc = ARM::ANDrr; break;
+ case ARM::ORRrsi: newOpc = ARM::ORRrr; break;
+ case ARM::EORrsi: newOpc = ARM::EORrr; break;
+ case ARM::BICrsi: newOpc = ARM::BICrr; break;
+ case ARM::SUBrsi: newOpc = ARM::SUBrr; break;
+ case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
+ }
+ // If the shift is by zero, use the non-shifted instruction definition.
+ if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ITasm:
case ARM::t2IT: {
// The mask bits for all but the first condition are represented as
// the low bit of the condition code value implies 't'. We currently
@@ -4352,13 +7220,14 @@ processInstruction(MCInst &Inst,
break;
}
}
+ return false;
}
unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// 16-bit thumb arithmetic instructions either require or preclude the 'S'
// suffix depending on whether they're in an IT block or not.
unsigned Opc = Inst.getOpcode();
- MCInstrDesc &MCID = getInstDesc(Opc);
+ const MCInstrDesc &MCID = getInstDesc(Opc);
if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
assert(MCID.hasOptionalDef() &&
"optionally flag setting instruction missing optional def operand");
@@ -4417,14 +7286,23 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
// Some instructions need post-processing to, for example, tweak which
- // encoding is selected.
- processInstruction(Inst, Operands);
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands))
+ ;
// Only move forward at the very end so that everything in validate
// and process gets a consistent answer about whether we're in an IT
// block.
forwardITPosition();
+ // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
+ // doesn't actually encode.
+ if (Inst.getOpcode() == ARM::ITasm)
+ return false;
+
+ Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
return false;
case Match_MissingFeature:
@@ -4458,7 +7336,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
llvm_unreachable("Implement any new match types added!");
- return true;
}
/// parseDirective parses the arm specific directives
@@ -4468,12 +7345,20 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
+ else if (IDVal == ".arch")
+ return parseDirectiveArch(DirectiveID.getLoc());
+ else if (IDVal == ".eabi_attribute")
+ return parseDirectiveEabiAttr(DirectiveID.getLoc());
return true;
}
@@ -4509,9 +7394,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
- // TODO: set thumb mode
- // TODO: tell the MC streamer the mode
- // getParser().getStreamer().Emit???();
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
@@ -4521,24 +7419,33 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
bool isMachO = MAI.hasSubsectionsViaSymbols();
StringRef Name;
+ bool needFuncName = true;
- // Darwin asm has function name after .thumb_func direction
+ // Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
if (isMachO) {
const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
- return Error(L, "unexpected token in .thumb_func directive");
- Name = Tok.getString();
- Parser.Lex(); // Consume the identifier token.
+ if (Tok.isNot(AsmToken::EndOfStatement)) {
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+ return Error(L, "unexpected token in .thumb_func directive");
+ Name = Tok.getIdentifier();
+ Parser.Lex(); // Consume the identifier token.
+ needFuncName = false;
+ }
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
- Parser.Lex();
+
+ // Eat the end of statement and any blank lines that follow.
+ while (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex();
// FIXME: assuming function name will be the line following .thumb_func
- if (!isMachO) {
- Name = Parser.getTok().getString();
+ // We really should be checking the next symbol definition even if there's
+ // stuff in between.
+ if (needFuncName) {
+ Name = Parser.getTok().getIdentifier();
}
// Mark symbol as a thumb symbol.
@@ -4601,6 +7508,57 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
return false;
}
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.EatToEndOfStatement();
+ return Error(SRegLoc, "register name expected");
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return Error(Parser.getTok().getLoc(),
+ "unexpected input in .req directive.");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+ return Error(SRegLoc, "redefinition of '" + Name +
+ "' does not match original.");
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.EatToEndOfStatement();
+ return Error(L, "unexpected input in .unreq directive.");
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
+/// parseDirectiveArch
+/// ::= .arch token
+bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
+ return true;
+}
+
+/// parseDirectiveEabiAttr
+/// ::= .eabi_attribute int, int
+bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+ return true;
+}
+
extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39debc3..e24a1b17867a 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
)
add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..f0184b675dac
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMAsmParser
+parent = ARM
+required_libraries = ARMDesc ARMInfo MC MCParser Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index f045e839a616..9a2aab5304ab 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,18 +1,18 @@
set(LLVM_TARGET_DEFINITIONS ARM.td)
-llvm_tablegen(ARMGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(ARMGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(ARMGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-llvm_tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
-llvm_tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(ARMGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(ARMGenFastISel.inc -gen-fast-isel)
-llvm_tablegen(ARMGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
-llvm_tablegen(ARMGenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
+tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
@@ -26,7 +26,6 @@ add_llvm_target(ARMCodeGen
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
ARMFrameLowering.cpp
- ARMGlobalMerge.cpp
ARMHazardRecognizer.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
@@ -34,6 +33,7 @@ add_llvm_target(ARMCodeGen
ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
+ ARMMachineFunctionInfo.cpp
ARMRegisterInfo.cpp
ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
@@ -49,22 +49,8 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
-add_llvm_library_dependencies(LLVMARMCodeGen
- LLVMARMAsmPrinter
- LLVMARMDesc
- LLVMARMInfo
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
-# workaround for hanging compilation on MSVC10
-if( MSVC_VERSION EQUAL 1600 )
+# workaround for hanging compilation on MSVC9, 10
+if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
SOURCE ARMISelLowering.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8f2f813b676a..2f504b756b1b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,17 +9,16 @@
#define DEBUG_TYPE "arm-disassembler"
-#include "ARM.h"
-#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
@@ -52,7 +51,7 @@ public:
raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
+ const EDInstInfo *getEDInfo() const;
private:
};
@@ -77,7 +76,7 @@ public:
raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
+ const EDInstInfo *getEDInfo() const;
private:
mutable std::vector<unsigned> ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -97,224 +96,236 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
Out = In;
return false;
}
- return false;
+ llvm_unreachable("Invalid DecodeStatus!");
}
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
-static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst,
+static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst,
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
unsigned Insn,
uint64_t Adddress,
const void *Decoder);
-static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val,
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-
-
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
#include "ARMGenInstrInfo.inc"
#include "ARMGenEDInfo.inc"
@@ -327,11 +338,11 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
return new ThumbDisassembler(STI);
}
-EDInstInfo *ARMDisassembler::getEDInfo() const {
+const EDInstInfo *ARMDisassembler::getEDInfo() const {
return instInfoARM;
}
-EDInstInfo *ThumbDisassembler::getEDInfo() const {
+const EDInstInfo *ThumbDisassembler::getEDInfo() const {
return instInfoARM;
}
@@ -415,7 +426,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
namespace llvm {
-extern MCInstrDesc ARMInsts[];
+extern const MCInstrDesc ARMInsts[];
}
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
@@ -435,40 +446,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
MCInst &MI, const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
- if (!getOpInfo)
- return false;
-
struct LLVMOpInfo1 SymbolicOp;
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
void *DisInfo = Dis->getDisInfoBlock();
- if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
- if (isBranch) {
- LLVMSymbolLookupCallback SymbolLookUp =
- Dis->getLLVMSymbolLookupCallback();
- if (SymbolLookUp) {
- uint64_t ReferenceType;
- ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
- const char *ReferenceName;
- const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
- &ReferenceName);
- if (Name) {
- SymbolicOp.AddSymbol.Name = Name;
- SymbolicOp.AddSymbol.Present = true;
- SymbolicOp.Value = 0;
- }
- else {
- SymbolicOp.Value = Value;
- }
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
- (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
- }
- else {
- return false;
- }
- }
- else {
+
+ if (!getOpInfo ||
+ !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ // Clear SymbolicOp.Value from above and also all other fields.
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (!SymbolLookUp)
return false;
+ uint64_t ReferenceType;
+ if (isBranch)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ else
+ ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
}
+ // For branches always create an MCExpr so it gets printed as hex address.
+ else if (isBranch) {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ if (!Name && !isBranch)
+ return false;
}
MCContext *Ctx = Dis->getMCContext();
@@ -527,8 +536,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
MI.addOperand(MCOperand::CreateExpr(Expr));
- else
- assert(0 && "bad SymbolicOp.VariantKind");
+ else
+ llvm_unreachable("bad SymbolicOp.VariantKind");
return true;
}
@@ -543,7 +552,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
- const void *Decoder) {
+ const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
if (SymbolLookUp) {
@@ -841,14 +850,14 @@ extern "C" void LLVMInitializeARMDisassembler() {
createThumbDisassembler);
}
-static const unsigned GPRDecoderTable[] = {
+static const uint16_t GPRDecoderTable[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
ARM::R8, ARM::R9, ARM::R10, ARM::R11,
ARM::R12, ARM::SP, ARM::LR, ARM::PC
};
-static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -859,20 +868,26 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo == 15) return MCDisassembler::Fail;
- return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
}
-static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
unsigned Register = 0;
switch (RegNo) {
@@ -902,13 +917,13 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned SPRDecoderTable[] = {
+static const uint16_t SPRDecoderTable[] = {
ARM::S0, ARM::S1, ARM::S2, ARM::S3,
ARM::S4, ARM::S5, ARM::S6, ARM::S7,
ARM::S8, ARM::S9, ARM::S10, ARM::S11,
@@ -919,7 +934,7 @@ static const unsigned SPRDecoderTable[] = {
ARM::S28, ARM::S29, ARM::S30, ARM::S31
};
-static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -929,7 +944,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPRDecoderTable[] = {
+static const uint16_t DPRDecoderTable[] = {
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10, ARM::D11,
@@ -940,7 +955,7 @@ static const unsigned DPRDecoderTable[] = {
ARM::D28, ARM::D29, ARM::D30, ARM::D31
};
-static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -950,7 +965,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
@@ -958,14 +973,14 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned QPRDecoderTable[] = {
+static const uint16_t QPRDecoderTable[] = {
ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
@@ -973,7 +988,7 @@ static const unsigned QPRDecoderTable[] = {
};
-static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -984,7 +999,49 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+static const uint16_t DPairDecoderTable[] = {
+ ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
+ ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
+ ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
+ ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24,
+ ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30,
+ ARM::Q15
+};
+
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPairSpacedDecoderTable[] = {
+ ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5,
+ ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9,
+ ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13,
+ ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17,
+ ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21,
+ ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25,
+ ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29,
+ ARM::D28_D30, ARM::D29_D31
+};
+
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 29)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairSpacedDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val == 0xF) return MCDisassembler::Fail;
// AL predicate is not allowed on Thumb1 branches.
@@ -998,7 +1055,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val)
Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
@@ -1007,7 +1064,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
uint32_t imm = Val & 0xFF;
uint32_t rot = (Val & 0xF00) >> 7;
@@ -1016,7 +1073,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1053,7 +1110,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1088,7 +1145,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1123,7 +1180,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1140,7 +1197,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1157,7 +1214,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
// This operand encodes a mask of contiguous zeros between a specified MSB
// and LSB. To decode it, we create the mask of all bits MSB-and-lower,
@@ -1178,7 +1235,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1234,16 +1291,6 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
-
- bool writeback = (P == 0) || (W == 1);
- unsigned idx_mode = 0;
- if (P && writeback)
- idx_mode = ARMII::IndexModePre;
- else if (!P && writeback)
- idx_mode = ARMII::IndexModePost;
-
switch (Inst.getOpcode()) {
case ARM::t2LDC2_OFFSET:
case ARM::t2LDC2L_OFFSET:
@@ -1333,7 +1380,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1436,7 +1483,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1477,7 +1524,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
}
static DecodeStatus
-DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1490,6 +1537,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
unsigned pred = fieldFromInstruction32(Insn, 28, 4);
unsigned W = fieldFromInstruction32(Insn, 21, 1);
unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt2 = Rt + 1;
bool writeback = (W == 1) | (P == 0);
@@ -1501,7 +1549,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST:
- if (Rt & 0x1) return MCDisassembler::Fail;
+ if (Rt & 0x1) S = MCDisassembler::SoftFail;
+ break;
+ default:
+ break;
+ }
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+
+ if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && fieldFromInstruction32(Insn, 8, 4))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (type && Rn == 15){
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && Rn == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (type && (Rt == 15 || (writeback && Rn == Rt)))
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt == 15 || Rm == 15))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
break;
default:
break;
@@ -1588,7 +1715,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1617,7 +1744,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1702,7 +1829,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
return S;
}
-static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction32(Insn, 18, 2);
unsigned M = fieldFromInstruction32(Insn, 17, 1);
@@ -1742,7 +1869,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction32(Insn, 9, 2);
unsigned M = fieldFromInstruction32(Insn, 8, 1);
@@ -1782,7 +1909,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1806,7 +1933,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1832,7 +1959,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1860,7 +1987,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1880,7 +2007,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1899,13 +2026,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
}
static DecodeStatus
-DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
+ (fieldFromInstruction32(Insn, 11, 1) << 18) |
+ (fieldFromInstruction32(Insn, 13, 1) << 17) |
+ (fieldFromInstruction32(Insn, 16, 6) << 11) |
+ (fieldFromInstruction32(Insn, 26, 1) << 19);
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
+ return S;
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1915,12 +2057,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
if (pred == 0xF) {
Inst.setOpcode(ARM::BLXi);
imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
return S;
}
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
- 4, Inst, Decoder))
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1929,13 +2073,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(64 - Val));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1952,7 +2090,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1964,47 +2102,38 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
// First output register
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+ case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+ case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+ case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+ case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
// Second output register
switch (Inst.getOpcode()) {
- case ARM::VLD1q8:
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
- case ARM::VLD1d8T:
- case ARM::VLD1d16T:
- case ARM::VLD1d32T:
- case ARM::VLD1d64T:
- case ARM::VLD1d8T_UPD:
- case ARM::VLD1d16T_UPD:
- case ARM::VLD1d32T_UPD:
- case ARM::VLD1d64T_UPD:
- case ARM::VLD1d8Q:
- case ARM::VLD1d16Q:
- case ARM::VLD1d32Q:
- case ARM::VLD1d64Q:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2d8:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2020,12 +2149,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VLD2b8:
- case ARM::VLD2b16:
- case ARM::VLD2b32:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
case ARM::VLD3q8:
case ARM::VLD3q16:
case ARM::VLD3q32:
@@ -2046,28 +2169,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third output register
switch(Inst.getOpcode()) {
- case ARM::VLD1d8T:
- case ARM::VLD1d16T:
- case ARM::VLD1d32T:
- case ARM::VLD1d64T:
- case ARM::VLD1d8T_UPD:
- case ARM::VLD1d16T_UPD:
- case ARM::VLD1d32T_UPD:
- case ARM::VLD1d64T_UPD:
- case ARM::VLD1d8Q:
- case ARM::VLD1d16Q:
- case ARM::VLD1d32Q:
- case ARM::VLD1d64Q:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2104,20 +2205,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth output register
switch (Inst.getOpcode()) {
- case ARM::VLD1d8Q:
- case ARM::VLD1d16Q:
- case ARM::VLD1d32Q:
- case ARM::VLD1d64Q:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
case ARM::VLD4d8:
case ARM::VLD4d16:
case ARM::VLD4d32:
@@ -2142,31 +2229,58 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// Writeback operand
switch (Inst.getOpcode()) {
- case ARM::VLD1d8_UPD:
- case ARM::VLD1d16_UPD:
- case ARM::VLD1d32_UPD:
- case ARM::VLD1d64_UPD:
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
- case ARM::VLD1d8T_UPD:
- case ARM::VLD1d16T_UPD:
- case ARM::VLD1d32T_UPD:
- case ARM::VLD1d64T_UPD:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d8Twb_register:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d16Twb_register:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d32Twb_register:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d8Qwb_register:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d16Qwb_register:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d32Qwb_register:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
@@ -2191,17 +2305,66 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
// AddrMode6 Offset (register)
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ default:
+ // The below have been updated to have explicit am6offset split
+ // between fixed and register offset. For those instructions not
+ // yet updated, we need to add an additional reg0 operand for the
+ // fixed variant.
+ //
+ // The fixed offset encodes as Rm == 0xd, so we check for that.
+ if (Rm == 0xd) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ break;
+ }
+ // Fall through to handle the register offset variant.
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ break;
}
return S;
}
-static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2214,31 +2377,60 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Writeback Operand
switch (Inst.getOpcode()) {
- case ARM::VST1d8_UPD:
- case ARM::VST1d16_UPD:
- case ARM::VST1d32_UPD:
- case ARM::VST1d64_UPD:
- case ARM::VST1q8_UPD:
- case ARM::VST1q16_UPD:
- case ARM::VST1q32_UPD:
- case ARM::VST1q64_UPD:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1d8wb_register:
+ case ARM::VST1d16wb_register:
+ case ARM::VST1d32wb_register:
+ case ARM::VST1d64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
+ if (Rm == 0xF)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2263,55 +2455,89 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
// AddrMode6 Offset (register)
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ default:
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ break;
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ break;
}
+
// First input register
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
// Second input register
switch (Inst.getOpcode()) {
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST1q8_UPD:
- case ARM::VST1q16_UPD:
- case ARM::VST1q32_UPD:
- case ARM::VST1q64_UPD:
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2327,12 +2553,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VST2b8:
- case ARM::VST2b16:
- case ARM::VST2b32:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
case ARM::VST3q8:
case ARM::VST3q16:
case ARM::VST3q32:
@@ -2354,28 +2574,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2412,20 +2610,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
@@ -2451,7 +2635,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2461,15 +2645,21 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
align *= (1 << size);
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (regs == 2) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
}
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -2480,17 +2670,17 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
- }
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
return S;
}
-static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2500,18 +2690,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
align *= 2*size;
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+ case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+ case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+ case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
}
+ if (Rm != 0xF)
+ Inst.addOperand(MCOperand::CreateImm(0));
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
@@ -2523,10 +2728,13 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
return S;
}
-static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2561,7 +2769,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2614,7 +2822,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2659,7 +2867,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2678,31 +2886,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(8 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(16 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(32 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(64 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2713,7 +2921,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
unsigned op = fieldFromInstruction32(Insn, 6, 1);
- unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2722,9 +2929,15 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail; // Writeback
}
- for (unsigned i = 0; i < length; ++i) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VTBL2:
+ case ARM::VTBX2:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
}
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -2733,7 +2946,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2757,25 +2970,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
return S;
}
-static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2790,7 +3009,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2804,7 +3023,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
unsigned imm = Val << 2;
@@ -2814,7 +3033,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
Inst.addOperand(MCOperand::CreateImm(Val));
@@ -2822,7 +3041,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2839,7 +3058,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2894,7 +3113,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
int imm = Val & 0xFF;
if (!(Val & 0x100)) imm *= -1;
@@ -2903,7 +3122,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2918,7 +3137,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2933,7 +3152,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
return S;
}
-static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
int imm = Val & 0xFF;
if (Val == 0)
@@ -2946,7 +3165,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
}
-static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2977,7 +3196,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3007,7 +3226,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3022,7 +3241,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
}
-static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
unsigned imm = fieldFromInstruction16(Insn, 0, 7);
@@ -3033,7 +3252,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3058,7 +3277,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
return S;
}
-static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
unsigned flags = fieldFromInstruction16(Insn, 0, 3);
@@ -3069,29 +3288,29 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned add = fieldFromInstruction32(Insn, 4, 1);
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(add));
return S;
}
-static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address,
+ if (!tryAddingSymbolicOperand(Address,
(Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
@@ -3101,7 +3320,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
}
static DecodeStatus
-DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
+DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3117,7 +3336,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3159,7 +3378,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
// Decode a shifted immediate operand. These basically consist
// of an 8-bit value, and a 4-bit directive that specifies either
// a splat operation or a rotation.
-static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
if (ctrl == 0) {
@@ -3191,19 +3410,23 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
}
static DecodeStatus
-DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
+DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- Inst.addOperand(MCOperand::CreateImm(Val << 1));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
switch (Val) {
default:
@@ -3223,14 +3446,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (!Val) return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3253,7 +3476,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder){
DecodeStatus S = MCDisassembler::Success;
@@ -3280,7 +3503,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3305,7 +3528,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3333,7 +3556,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3358,7 +3581,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3383,7 +3606,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3442,7 +3665,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3500,7 +3723,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3567,7 +3790,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3631,7 +3854,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3701,7 +3924,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3765,7 +3988,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3839,7 +4062,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3904,7 +4127,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
@@ -3930,7 +4153,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
@@ -3956,7 +4179,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction16(Insn, 4, 4);
@@ -3983,7 +4206,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4020,7 +4243,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4054,7 +4277,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
uint64_t Address, const void *Decoder) {
unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
@@ -4069,7 +4292,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
uint64_t Address,
const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4080,3 +4303,109 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
return S;
}
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv2f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv2f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv4f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv4f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Val, 12, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
+ unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+
+ if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751150e8..9de6e5c511bd 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
)
endif()
add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
- LLVMARMCodeGen
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..52d833893270
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMDisassembler
+parent = ARM
+required_libraries = ARMDesc ARMInfo MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index ccdac3ebeb47..b3eeafe08314 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -18,11 +18,11 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define GET_INSTRUCTION_NAME
#include "ARMGenAsmWriter.inc"
/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
@@ -36,16 +36,14 @@ static unsigned translateShiftImm(unsigned imm) {
ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) :
- MCInstPrinter(MAI) {
+ MCInstPrinter(MAI, MII, MRI) {
// Initialize the set of available features.
setAvailableFeatures(STI.getFeatureBits());
}
-StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
-
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
@@ -101,7 +99,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// A8.6.123 PUSH
if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
+ MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
O << '\t' << "push";
printPredicateOperand(MI, 2, O);
if (Opcode == ARM::t2STMDB_UPD)
@@ -122,7 +122,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// A8.6.122 POP
if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
+ MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
O << '\t' << "pop";
printPredicateOperand(MI, 2, O);
if (Opcode == ARM::t2LDMIA_UPD)
@@ -250,7 +252,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (ShOpc == ARM_AM::rrx)
return;
-
+
O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
}
@@ -433,6 +435,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, Op, O);
+ return;
+ }
+
const MCOperand &MO3 = MI->getOperand(Op+2);
unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
@@ -636,7 +644,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
if (getAvailableFeatures() & ARM::FeatureMClass) {
switch (Op.getImm()) {
- default: assert(0 && "Unexpected mask value!");
+ default: llvm_unreachable("Unexpected mask value!");
case 0: O << "apsr"; return;
case 1: O << "iapsr"; return;
case 2: O << "eapsr"; return;
@@ -659,12 +667,11 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
O << "APSR_";
switch (Mask) {
- default: assert(0);
+ default: llvm_unreachable("Unexpected mask value!");
case 4: O << "g"; return;
case 8: O << "nzcvq"; return;
case 12: O << "nzcvqg"; return;
}
- llvm_unreachable("Unexpected mask value!");
}
if (SpecRegRBit)
@@ -684,7 +691,10 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
- if (CC != ARMCC::AL)
+ // Handle the undefined 15 CC value here for printing so we don't abort().
+ if ((unsigned)CC == 15)
+ O << "<und>";
+ else if (CC != ARMCC::AL)
O << ARMCondCodeToString(CC);
}
@@ -882,6 +892,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
O << "[" << getRegisterName(MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm() / 4;
@@ -963,7 +978,8 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- O << "#0x" << utohexstr(Val);
+ O << "#0x";
+ O.write_hex(Val);
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
@@ -986,7 +1002,153 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
}
}
+void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << 16 - MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << 32 - MI->getOperand(OpNum).getImm();
+}
+
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
+
+void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
+}
+
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+}
+
+void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
+void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
+}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}";
+}
+
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}";
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 5c2173fcde62..8acb7eef019b 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,14 +23,12 @@ class MCOperand;
class ARMInstPrinter : public MCInstPrinter {
public:
- ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- static const char *getInstructionName(unsigned Opcode);
-
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
@@ -128,7 +126,33 @@ public:
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b4957ccde..e2d4819b4b4a 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
)
add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..6f4fa365358c
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMAsmPrinter
+parent = ARM
+required_libraries = MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
new file mode 100644
index 000000000000..fd4b3a33de1a
--- /dev/null
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -0,0 +1,35 @@
+;===- ./lib/Target/ARM/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = ARM
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = ARMCodeGen
+parent = ARM
+required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 9982fa68a578..62473b2bfdee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -1,4 +1,4 @@
-//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,6 +16,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -43,7 +44,7 @@ namespace ARM_AM {
static inline const char *getShiftOpcStr(ShiftOpc Op) {
switch (Op) {
- default: assert(0 && "Unknown shift opc!");
+ default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::asr: return "asr";
case ARM_AM::lsl: return "lsl";
case ARM_AM::lsr: return "lsr";
@@ -54,7 +55,7 @@ namespace ARM_AM {
static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
switch (Op) {
- default: assert(0 && "Unknown shift opc!");
+ default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::asr: return 2;
case ARM_AM::lsl: return 0;
case ARM_AM::lsr: return 1;
@@ -72,7 +73,7 @@ namespace ARM_AM {
static inline const char *getAMSubModeStr(AMSubMode Mode) {
switch (Mode) {
- default: assert(0 && "Unknown addressing sub-mode!");
+ default: llvm_unreachable("Unknown addressing sub-mode!");
case ARM_AM::ia: return "ia";
case ARM_AM::ib: return "ib";
case ARM_AM::da: return "da";
@@ -569,7 +570,7 @@ namespace ARM_AM {
}
EltBits = 64;
} else {
- assert(false && "Unsupported NEON immediate");
+ llvm_unreachable("Unsupported NEON immediate");
}
return Val;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index c31c5e6b8452..d10bfc104a3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -11,17 +11,18 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -31,8 +32,8 @@ using namespace llvm;
namespace {
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
public:
- ARMELFObjectWriter(Triple::OSType OSType)
- : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+ ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
};
@@ -60,15 +61,16 @@ public:
// ARMFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
@@ -76,6 +78,9 @@ public:
{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
@@ -100,13 +105,50 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- bool MayNeedRelaxation(const MCInst &Inst) const;
+ /// processFixupValue - Target hook to process the literal value of a fixup
+ /// if necessary.
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ // Some fixups to thumb function symbols need the low bit (thumb bit)
+ // twiddled.
+ if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ if (A) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Asm.isThumbFunc(&Sym))
+ Value |= 1;
+ }
+ }
+ // We must always generate a relocation for BL/BLX instructions if we have
+ // a symbol to reference, as the linker relies on knowing the destination
+ // symbol's thumb-ness to get interworking right.
+ if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ IsResolved = false;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
- void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
- bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
- void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+ void handleAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
default: break;
case MCAF_Code16:
@@ -124,21 +166,81 @@ public:
};
} // end anonymous namespace
-bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
- // FIXME: Thumb targets, different move constant targets..
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case ARM::tBcc: return ARM::t2Bcc;
+ case ARM::tLDRpciASM: return ARM::t2LDRpci;
+ case ARM::tADR: return ARM::t2ADR;
+ case ARM::tB: return ARM::t2B;
+ }
+}
+
+bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
return false;
}
-void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
- return;
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ switch ((unsigned)Fixup.getKind()) {
+ case ARM::fixup_arm_thumb_br: {
+ // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 2046 || Offset < -2048;
+ }
+ case ARM::fixup_arm_thumb_bcc: {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 254 || Offset < -256;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_cp: {
+ // If the immediate is negative, greater than 1020, or not a multiple
+ // of four, the wide version of the instruction must be used.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 1020 || Offset < 0 || Offset & 3;
+ }
+ }
+ llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+}
+
+void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // The instructions we're relaxing have (so far) the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
}
-bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
- const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
+ const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
: Thumb1_16bitNopEncoding;
@@ -269,6 +371,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case ARM::fixup_arm_condbranch:
case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
// These values don't encode the low two bits since they're always zero.
// Offset by 8 just as above.
return 0xffffff & ((Value - 8) >> 2);
@@ -359,6 +464,19 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10_unscaled: {
+ Value = Value - 8; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
+ assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ Value = (Value & 0xf) | ((Value & 0xf0) << 4);
+ return Value | (isAdd << 23);
+ }
case ARM::fixup_arm_pcrel_10:
Value = Value - 4; // ARM fixups offset by an additional word and don't
// need to adjust for the half-word ordering.
@@ -376,8 +494,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
assert ((Value < 256) && "Out of range pc-relative fixup value!");
Value |= isAdd << 23;
- // Same addressing mode as fixup_arm_pcrel_10,
- // but with 16-bit halfwords swapped.
+ // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
+ // swapped.
if (Kind == ARM::fixup_t2_pcrel_10) {
uint32_t swapped = (Value & 0xFFFF0000) >> 16;
swapped |= (Value & 0x0000FFFF) << 16;
@@ -395,22 +513,21 @@ namespace {
// ELF is an ELF of course...
class ELFARMAsmBackend : public ARMAsmBackend {
public:
- Triple::OSType OSType;
+ uint8_t OSABI;
ELFARMAsmBackend(const Target &T, const StringRef TT,
- Triple::OSType _OSType)
- : ARMAsmBackend(T, TT), OSType(_OSType) { }
+ uint8_t _OSABI)
+ : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
- /*IsLittleEndian*/ true);
+ return createARMELFObjectWriter(OS, OSABI);
}
};
// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = 4; // FIXME: 2 for Thumb
Value = adjustFixupValue(Fixup.getKind(), Value);
@@ -439,7 +556,7 @@ public:
Subtype);
}
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -464,9 +581,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_arm_thumb_cb:
return 2;
+ case ARM::fixup_arm_pcrel_10_unscaled:
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
case ARM::fixup_arm_condbranch:
case ARM::fixup_arm_uncondbranch:
return 3;
@@ -491,7 +612,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
}
-void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup.getKind(), Value);
@@ -527,5 +648,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
if (TheTriple.isOSWindows())
assert(0 && "Windows not supported on ARM");
- return new ELFARMAsmBackend(T, TT, Triple(TT).getOS());
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, OSABI);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ec4b6ffcfe83..ae11be888137 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -67,7 +67,6 @@ namespace ARMCC {
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
switch (CC) {
- default: llvm_unreachable("Unknown condition code");
case ARMCC::EQ: return "eq";
case ARMCC::NE: return "ne";
case ARMCC::HS: return "hs";
@@ -84,6 +83,7 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
case ARMCC::LE: return "le";
case ARMCC::AL: return "al";
}
+ llvm_unreachable("Unknown condition code");
}
namespace ARM_PROC {
@@ -185,6 +185,39 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
case S29: case D29: return 29;
case S30: case D30: return 30;
case S31: case D31: return 31;
+
+ // Composite registers use the regnum of the first register in the list.
+ /* Q0 */ case D0_D2: return 0;
+ case D1_D2: case D1_D3: return 1;
+ /* Q1 */ case D2_D4: return 2;
+ case D3_D4: case D3_D5: return 3;
+ /* Q2 */ case D4_D6: return 4;
+ case D5_D6: case D5_D7: return 5;
+ /* Q3 */ case D6_D8: return 6;
+ case D7_D8: case D7_D9: return 7;
+ /* Q4 */ case D8_D10: return 8;
+ case D9_D10: case D9_D11: return 9;
+ /* Q5 */ case D10_D12: return 10;
+ case D11_D12: case D11_D13: return 11;
+ /* Q6 */ case D12_D14: return 12;
+ case D13_D14: case D13_D15: return 13;
+ /* Q7 */ case D14_D16: return 14;
+ case D15_D16: case D15_D17: return 15;
+ /* Q8 */ case D16_D18: return 16;
+ case D17_D18: case D17_D19: return 17;
+ /* Q9 */ case D18_D20: return 18;
+ case D19_D20: case D19_D21: return 19;
+ /* Q10 */ case D20_D22: return 20;
+ case D21_D22: case D21_D23: return 21;
+ /* Q11 */ case D22_D24: return 22;
+ case D23_D24: case D23_D25: return 23;
+ /* Q12 */ case D24_D26: return 24;
+ case D25_D26: case D25_D27: return 25;
+ /* Q13 */ case D26_D28: return 26;
+ case D27_D28: case D27_D29: return 27;
+ /* Q14 */ case D28_D30: return 28;
+ case D29_D30: case D29_D31: return 29;
+ /* Q15 */
}
}
@@ -237,7 +270,6 @@ namespace ARMII {
inline static const char *AddrModeToString(AddrMode addrmode) {
switch (addrmode) {
- default: llvm_unreachable("Unknown memory operation");
case AddrModeNone: return "AddrModeNone";
case AddrMode1: return "AddrMode1";
case AddrMode2: return "AddrMode2";
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
new file mode 100644
index 000000000000..aa649badaf82
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -0,0 +1,283 @@
+//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+namespace {
+ class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+ enum { DefaultEABIVersion = 0x05000000U };
+ unsigned GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+
+ public:
+ ARMELFObjectWriter(uint8_t OSABI);
+
+ virtual ~ARMELFObjectWriter();
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual unsigned getEFlags() const;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ };
+}
+
+ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
+ ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+
+ARMELFObjectWriter::~ARMELFObjectWriter() {}
+
+// FIXME: get the real EABI Version from the Triple.
+unsigned ARMELFObjectWriter::getEFlags() const {
+ return ELF::EF_ARM_EABIMASK & DefaultEABIVersion;
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is an approximation of what ARM/gcc does.
+
+STATISTIC(PCRelCount, "Total number of PIC Relocations");
+STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+ bool EmitThisSym = false;
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol.getSection());
+ bool InNormalSection = true;
+ unsigned RelocType = 0;
+ RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+ DEBUG(
+ const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ MCSymbolRefExpr::VariantKind Kind2;
+ Kind2 = Target.getSymB() ? Target.getSymB()->getKind() :
+ MCSymbolRefExpr::VK_None;
+ dbgs() << "considering symbol "
+ << Section.getSectionName() << "/"
+ << Symbol.getName() << "/"
+ << " Rel:" << (unsigned)RelocType
+ << " Kind: " << (int)Kind << "/" << (int)Kind2
+ << " Tmp:"
+ << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
+ << Symbol.isVariable() << "/" << Symbol.isTemporary()
+ << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
+
+ if (IsPCRel) { ++PCRelCount;
+ switch (RelocType) {
+ default:
+ // Most relocation types are emitted as explicit symbols
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = true;
+ break;
+ case ELF::R_ARM_ABS32:
+ // But things get strange with R_ARM_ABS32
+ // In this case, most things that go in .rodata show up
+ // as section relative relocations
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".rodata", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = false;
+ break;
+ }
+ } else {
+ NonPCRelCount++;
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".rodata", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+
+ switch (RelocType) {
+ default: EmitThisSym = true; break;
+ case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ }
+ }
+
+ if (EmitThisSym)
+ return &Symbol;
+ if (! Symbol.isTemporary() && InNormalSection) {
+ return &Symbol;
+ }
+ return NULL;
+}
+
+// Need to examine the Fixup when determining whether to
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return GetRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ unsigned Type = 0;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("Unimplemented");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_REL32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ llvm_unreachable("unimplemented");
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_uncondbranch:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_PLT:
+ Type = ELF::R_ARM_PLT32;
+ break;
+ default:
+ Type = ELF::R_ARM_CALL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Type = ELF::R_ARM_MOVT_PREL;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ Type = ELF::R_ARM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Type = ELF::R_ARM_THM_MOVT_PREL;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ Type = ELF::R_ARM_THM_CALL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_GOT:
+ Type = ELF::R_ARM_GOT_BREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ Type = ELF::R_ARM_TLS_GD32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ Type = ELF::R_ARM_TLS_LE32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTOFF:
+ Type = ELF::R_ARM_GOTOFF32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TARGET1:
+ Type = ELF::R_ARM_TARGET1;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_arm_thumb_br:
+ llvm_unreachable("Unimplemented");
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_CALL;
+ break;
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 350c92decdce..0085feb82069 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -1,4 +1,4 @@
-//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,6 +23,9 @@ enum Fixups {
// the 16-bit halfwords reordered.
fixup_t2_ldst_pcrel_12,
+ // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
+ // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ fixup_arm_pcrel_10_unscaled,
// fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
// used in VFP instructions where the lower 2 bits are not encoded
// (so it's encoded as an 8-bit immediate).
@@ -56,6 +59,25 @@ enum Fixups {
// fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
fixup_arm_thumb_br,
+ // The following fixups handle the ARM BL instructions. These can be
+ // conditionalised; however, the ARM ELF ABI requires a different relocation
+ // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that
+ // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has
+ // no conditional version; R_ARM_JUMP24 would have to insert a veneer.
+ //
+ // MachO does not draw a distinction between the two cases, so it will treat
+ // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
+
+ // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ fixup_arm_uncondbl,
+
+ // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
+ // conditionalisation.
+ fixup_arm_condbl,
+
+ // fixup_arm_blx - Fixup for ARM BLX instructions.
+ fixup_arm_blx,
+
// fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
fixup_arm_thumb_bl,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1c109e015280..03e8d5f83ae7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -48,6 +48,8 @@ static const char *const arm_asm_table[] = {
0,0
};
+void ARMMCAsmInfoDarwin::anchor() { }
+
ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
AsmTransCBE = arm_asm_table;
Data64bitsDirective = 0;
@@ -61,6 +63,8 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
ExceptionsType = ExceptionHandling::SjLj;
}
+void ARMELFMCAsmInfo::anchor() { }
+
ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 90f7822ea580..f0b289c6f3b6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,11 +18,15 @@
namespace llvm {
- struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
explicit ARMMCAsmInfoDarwin();
};
- struct ARMELFMCAsmInfo : public MCAsmInfo {
+ class ARMELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit ARMELFMCAsmInfo();
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e22b842..10d1c48876ed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -64,7 +64,7 @@ public:
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
- unsigned getBinaryCodeForInstr(const MCInst &MI,
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -118,8 +118,10 @@ public:
/// branch target.
uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups) const;
/// getAdrLabelOpValue - Return encoding info for 12-bit immediate
/// ADR label target.
@@ -166,7 +168,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
switch (Mode) {
- default: assert(0 && "Unknown addressing sub-mode!");
+ default: llvm_unreachable("Unknown addressing sub-mode!");
case ARM_AM::da: return 0;
case ARM_AM::ia: return 1;
case ARM_AM::db: return 2;
@@ -177,7 +179,6 @@ public:
///
unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
switch (ShOpc) {
- default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::no_shift:
case ARM_AM::lsl: return 0;
case ARM_AM::lsr: return 1;
@@ -185,7 +186,7 @@ public:
case ARM_AM::ror:
case ARM_AM::rrx: return 3;
}
- return 0;
+ llvm_unreachable("Invalid ShiftOpc!");
}
/// getAddrMode2OpValue - Return encoding for addrmode2 operands.
@@ -423,7 +424,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
}
llvm_unreachable("Unable to encode MCOperand!");
- return 0;
}
/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
@@ -466,7 +466,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
assert(MO.isExpr() && "Unexpected branch target type!");
const MCExpr *Expr = MO.getExpr();
MCFixupKind Kind = MCFixupKind(FixupKind);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
// All of the information is in the fixup.
return 0;
@@ -594,17 +594,26 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
}
uint32_t ARMMCCodeEmitter::
-getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand MO = MI.getOperand(OpIdx);
if (MO.isExpr()) {
if (HasConditionalBranch(MI))
- return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_condbranch, Fixups);
- return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_uncondbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbl, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups);
}
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups);
+
return MO.getImm() >> 1;
}
@@ -718,12 +727,13 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
else
Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumCPRelocations;
} else {
Reg = ARM::PC;
int32_t Offset = MO.getImm();
+ // FIXME: Handle #-0.
if (Offset < 0) {
Offset *= -1;
isAdd = false;
@@ -791,8 +801,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
- MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumCPRelocations;
} else
@@ -833,7 +843,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// but this is good enough for now.
static bool EvaluateAsPCRel(const MCExpr *Expr) {
switch (Expr->getKind()) {
- default: assert(0 && "Unexpected expression type");
+ default: llvm_unreachable("Unexpected expression type");
case MCExpr::SymbolRef: return false;
case MCExpr::Binary: return true;
}
@@ -857,7 +867,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
MCFixupKind Kind;
switch (ARM16Expr->getKind()) {
- default: assert(0 && "Unsupported ARMFixup");
+ default: llvm_unreachable("Unsupported ARMFixup");
case ARMMCExpr::VK_ARM_HI16:
if (!isTargetDarwin() && EvaluateAsPCRel(E))
Kind = MCFixupKind(isThumb2()
@@ -879,12 +889,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
: ARM::fixup_arm_movw_lo16);
break;
}
- Fixups.push_back(MCFixup::Create(0, E, Kind));
+ Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
return 0;
};
llvm_unreachable("Unsupported MCExpr type in MCOperand!");
- return 0;
}
uint32_t ARMMCCodeEmitter::
@@ -993,6 +1002,19 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+
+ // If The first operand isn't a register, we have a label reference.
+ if (!MO.isReg()) {
+ unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ return (Rn << 9) | (1 << 13);
+ }
unsigned Rn = getARMRegisterNumbering(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
@@ -1066,7 +1088,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
else
Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumCPRelocations;
} else {
@@ -1312,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
// LDM/STM:
// {15-0} = Bitfield of GPRs.
unsigned Reg = MI.getOperand(Op).getReg();
- bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
- bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+ bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
unsigned Binary = 0;
@@ -1372,11 +1394,11 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
switch (Imm.getImm()) {
default: break;
- case 2:
- case 4:
case 8:
- case 16: Align = 0x00; break;
- case 32: Align = 0x03; break;
+ case 16:
+ case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes.
+ case 2: Align = 0x00; break;
+ case 4: Align = 0x03; break;
}
return RegNo | (Align << 4);
@@ -1412,7 +1434,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return MO.getReg();
+ return getARMRegisterNumbering(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 2727ba8c8aa5..22e14a2281de 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -21,7 +21,7 @@ ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
switch (Kind) {
- default: assert(0 && "Invalid kind!");
+ default: llvm_unreachable("Invalid kind!");
case VK_ARM_HI16: OS << ":upper16:"; break;
case VK_ARM_LO16: OS << ":lower16:"; break;
}
@@ -45,8 +45,7 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
switch (Value->getKind()) {
case MCExpr::Target:
- assert(0 && "Can't handle nested target expr!");
- break;
+ llvm_unreachable("Can't handle nested target expr!");
case MCExpr::Constant:
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 0a2e883deb1d..a727e087d291 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -1,4 +1,4 @@
-//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index a55c41075d40..e3512cda3ae3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -89,14 +89,6 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
ARMArchFeature += ",+thumb-mode";
}
- Triple TheTriple(TT);
- if (TheTriple.getOS() == Triple::NativeClient) {
- if (ARMArchFeature.empty())
- ARMArchFeature = "+nacl-mode";
- else
- ARMArchFeature += ",+nacl-mode";
- }
-
return ARMArchFeature;
}
@@ -137,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
Triple TheTriple(TT);
// Default relocation model on Darwin is PIC, not DynamicNoPIC.
RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
}
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
@@ -158,22 +151,23 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
- return NULL;
}
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI, STI);
+ return new ARMInstPrinter(MAI, MII, MRI, STI);
return 0;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 9b3d3bd32183..88472d7ffc3f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -46,6 +46,10 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI);
+
/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
bool Is64Bit,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e84df1..8057cb6687a6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -13,9 +13,11 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target,
unsigned Log2Size,
uint64_t &FixedValue);
- void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue);
+ void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -80,6 +82,9 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
case ARM::fixup_arm_adr_pcrel_12:
case ARM::fixup_arm_condbranch:
case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
// Report as 'long', even though that is not quite accurate.
Log2Size = llvm::Log2_32(4);
@@ -98,34 +103,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
Log2Size = llvm::Log2_32(4);
return true;
+ // For movw/movt r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 1;
+ return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_HalfDifference);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 0;
+ return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
RelocType = unsigned(macho::RIT_ARM_Half);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ Log2Size = 2;
return true;
}
}
void ARMMachObjectWriter::
-RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue) {
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Type = macho::RIT_ARM_Half;
@@ -135,7 +153,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
- report_fatal_error("symbol '" + A->getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
@@ -148,7 +167,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
@@ -178,9 +198,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
case ARM::fixup_t2_movw_lo16:
@@ -189,7 +216,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
break;
}
-
if (Type == macho::RIT_ARM_HalfDifference) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
@@ -233,7 +259,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
- report_fatal_error("symbol '" + A->getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
@@ -245,7 +272,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
@@ -287,19 +315,21 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
unsigned RelocType = macho::RIT_Vanilla;
- if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
- report_fatal_error("unknown ARM fixup kind!");
- return;
- }
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+ // If we failed to get fixup kind info, it's because there's no legal
+ // relocation type for the fixup kind. This happens when it's a fixup that's
+ // expected to always be resolvable at assembly time and not have any
+ // relocations needed.
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half ||
- RelocType == macho::RIT_ARM_HalfDifference)
- return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+ if (RelocType == macho::RIT_ARM_Half)
+ return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+ Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, Log2Size, FixedValue);
}
@@ -374,6 +404,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
+
+ // Even when it's not a scattered relocation, movw/movt always uses
+ // a PAIR relocation.
+ if (Type == macho::RIT_ARM_Half) {
+ // The other-half value only gets populated for the movt relocation.
+ uint32_t Value = 0;;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value = FixedValue;
+ break;
+ }
+ macho::RelocationEntry MREPair;
+ MREPair.Word0 = Value;
+ MREPair.Word1 = ((0xffffff) |
+ (Log2Size << 25) |
+ (macho::RIT_Pair << 28));
+
+ Writer->addRelocation(Fragment->getParent(), MREPair);
+ }
+
Writer->addRelocation(Fragment->getParent(), MRE);
}
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index adc37cbf582e..256599412e8b 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,19 +1,14 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
+ ARMELFObjectWriter.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
ARMMCTargetDesc.cpp
ARMMachObjectWriter.cpp
+ ARMELFObjectWriter.cpp
)
add_dependencies(LLVMARMDesc ARMCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
- LLVMARMInfo
- LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..2a7fe6188b50
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMDesc
+parent = ARM
+required_libraries = ARMAsmPrinter ARMInfo MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df00538b39f..28998361c7a0 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -1,4 +1,4 @@
-//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
.addReg(Src2Reg, getKillRegState(Src2Kill));
if (HasLane)
MIB.addImm(LaneImm);
MIB.addImm(Pred).addReg(PredReg);
- MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isBarrier()) {
+ if (MI->isBarrier()) {
clearStack();
Skip = 0;
++MII;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 2f6842e8cb60..3eddda812f84 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions.
//===---------------------------------------------------------------------===//
-ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
-ARMInstrInfo::commuteInstruction() to support it.
-
-//===---------------------------------------------------------------------===//
-
Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
@@ -699,3 +694,19 @@ test is equality test so it's more a conditional move rather than a select:
Currently this is a ARM specific dag combine. We probably should make it into a
target-neutral one.
+
+//===---------------------------------------------------------------------===//
+
+Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins
+are specified to be undefined at zero, so portable code must check for zero
+and handle it as a special case. That is unnecessary on ARM where those
+operations are implemented in a way that is well-defined for zero. For
+example:
+
+int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
+
+should just be implemented with a CLZ instruction. Since there are other
+targets, e.g., PPC, that share this behavior, it would be best to implement
+this in a target-independent way: we should probably fold that (when using
+"undefined at zero" semantics) to set the "defined at zero" bit and have
+the code generator expand out the right code.
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b136ce64..533e747894ca 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
)
add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..a07a94047d4e
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMInfo
+parent = ARM
+required_libraries = MC Support Target
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index d8481778c0da..edd73c20c0be 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -1,4 +1,4 @@
-//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "Thumb1FrameLowering.h"
-#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -101,7 +100,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetDarwin()) {
+ if (STI.isTargetIOS()) {
AFI->addGPRCalleeSavedArea2Frame(FI);
GPRCS2Size += 4;
} else {
@@ -175,14 +174,14 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
if (Reg == CSRegs[i])
return true;
return false;
}
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
@@ -214,7 +213,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
int NumBytes = (int)MFI->getStackSize();
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
@@ -278,8 +277,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
- .addReg(ARM::R3, RegState::Kill));
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill);
+ AddDefaultPred(MIB);
+ MIB->copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
}
@@ -350,6 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MIB->copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 218311d78d30..e03e75815c7e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,14 +13,11 @@
#include "Thumb1InstrInfo.h"
#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/SmallVector.h"
-#include "Thumb1InstrInfo.h"
+#include "llvm/MC/MCInst.h"
using namespace llvm;
@@ -28,6 +25,15 @@ Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tMOVr);
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
@@ -60,8 +66,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -89,8 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 17ef2f758ef4..36af20492d4e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,8 @@
#ifndef THUMB1INSTRUCTIONINFO_H
#define THUMB1INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "Thumb1RegisterInfo.h"
namespace llvm {
@@ -27,6 +26,9 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482a66fa..ef77bbd21a4e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,11 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb1RegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
@@ -570,6 +570,11 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// If this instruction affects R12, adjust our restore point.
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = II->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
+ UseMI = II;
+ done = true;
+ break;
+ }
if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
@@ -624,6 +629,21 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
FrameReg = BasePtr;
}
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
@@ -643,14 +663,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(Offset && "This code isn't needed if offset already handled!");
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MI.getDesc();
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
removeOperands(MI, PIdx);
- if (Desc.mayLoad()) {
+ if (MI.mayLoad()) {
// Use the destination register to materialize sp + offset.
unsigned TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
@@ -673,7 +692,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
- } else if (Desc.mayStore()) {
+ } else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
bool UseRR = false;
@@ -695,11 +714,11 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
} else {
- assert(false && "Unexpected opcode!");
+ llvm_unreachable("Unexpected opcode!");
}
// Add predicate back if it's needed.
- if (MI.getDesc().isPredicable()) {
+ if (MI.isPredicable()) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9060e59e5980..69718424e735 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB1REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b6274007b237..ecb4c2f0e5da 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,6 +13,7 @@
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -75,7 +76,7 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
unsigned Reg = LocalUses[i];
Uses.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
Uses.insert(*Subreg);
}
@@ -83,7 +84,7 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
Defs.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
Defs.insert(*Subreg);
if (Reg == ARM::CPSR)
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
// rsb r2, 0
//
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.hasOptionalDef() &&
+ if (MI->hasOptionalDef() &&
MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
return false;
@@ -153,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
++I;
if (I != E) {
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg);
if (NCC == CC || NCC == OCC)
return true;
}
@@ -170,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
continue;
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
if (MBBI->isDebugValue())
continue;
@@ -206,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
MI = NMI;
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
// Add implicit use of ITSTATE.
@@ -237,6 +238,10 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Last instruction in IT block kills ITSTATE.
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+ // Finalize the bundle.
+ MachineBasicBlock::instr_iterator LI = LastITMI;
+ finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI));
+
Modified = true;
++NumITs;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index cf040c822de9..8ab486b0c1bf 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,13 +15,11 @@
#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -35,6 +33,13 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tNOP);
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
// FIXME
return 0;
@@ -53,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// If the first instruction of Tail is predicated, we may have to update
// the IT instruction.
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg);
MachineBasicBlock::iterator MBBI = Tail;
if (CC != ARMCC::AL)
// Expecting at least the t2IT instruction before it.
@@ -101,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
}
unsigned PredReg = 0;
- return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+ return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
}
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -130,8 +135,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -158,8 +162,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -570,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
return;
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return;
@@ -586,10 +589,10 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
continue;
MachineInstr *NMI = &*MBBI;
- ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
if (!(NCC == CC || NCC == OCC) ||
NMI->modifiesRegister(SrcReg, &TRI) ||
- NMI->definesRegister(ARM::CPSR))
+ NMI->modifiesRegister(ARM::CPSR, &TRI))
break;
if (++NumInsts == 4)
// Too many in a row!
@@ -607,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
unsigned Opc = MI->getOpcode();
if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
return ARMCC::AL;
- return llvm::getInstrPredicate(MI, PredReg);
+ return getInstrPredicate(MI, PredReg);
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index f2637d7fbcab..0911f8a597ce 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,8 @@
#ifndef THUMB2INSTRUCTIONINFO_H
#define THUMB2INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "Thumb2RegisterInfo.h"
namespace llvm {
@@ -28,6 +27,9 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 355c3bf0352c..29a87d016227 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb2RegisterInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
-#include "Thumb2RegisterInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 824378aeab4e..6b397e869683 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB2REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 89a155c5a7f5..b5a397e61685 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -39,9 +39,9 @@ namespace {
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
- unsigned WideOpc; // Wide opcode
- unsigned NarrowOpc1; // Narrow opcode to transform to
- unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint16_t WideOpc; // Wide opcode
+ uint16_t NarrowOpc1; // Narrow opcode to transform to
+ uint16_t NarrowOpc2; // Narrow opcode when it's two-address
uint8_t Imm1Limit; // Limit of immediate field (bits)
uint8_t Imm2Limit; // Limit of immediate field when it's two-address
unsigned LowRegs1 : 1; // Only possible if low-registers are used
@@ -146,7 +146,8 @@ namespace {
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
DenseMap<unsigned, unsigned> ReduceOpcodeMap;
- bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+ bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+ bool IsSelfLoop);
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred,
@@ -157,19 +158,21 @@ namespace {
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, bool LiveCPSR,
- MachineInstr *CPSRDef);
+ MachineInstr *CPSRDef, bool IsSelfLoop);
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
/// instruction.
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef);
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop);
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
/// non-two-address instruction.
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef);
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop);
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
@@ -186,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+ for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
@@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
/// In this case it would have been ok to narrow the mul.w to muls since there
/// are indirect RAW dependency between the muls and the mul.w
bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
- if (!Def || !STI->avoidCPSRPartialUpdate())
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+ bool FirstInSelfLoop) {
+ // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
+ if (!STI->avoidCPSRPartialUpdate())
return false;
+ if (!Def)
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ return FirstInSelfLoop;
+
SmallSet<unsigned, 2> Defs;
for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = Def->getOperand(i);
@@ -442,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -468,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumLdSts;
return true;
}
@@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef) {
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
unsigned Opc = MI->getOpcode();
if (Opc == ARM::t2ADDri) {
// If the source register is SP, try to reduce to tADDrSPi, otherwise
// it's a normal reduce.
if (MI->getOperand(1).getReg() != ARM::SP) {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
}
// Try to reduce to tADDrSPi.
unsigned Imm = MI->getOperand(2).getImm();
@@ -502,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(ARM::tADDrSPi))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
@@ -514,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -522,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayLoad() || MCID.mayStore())
+ if (MI->mayLoad() || MI->mayStore())
return ReduceLoadStore(MBB, MI, Entry);
switch (Opc) {
@@ -535,12 +545,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
switch (Opc) {
default: break;
case ARM::t2ADDSri: {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
// fallthrough
}
case ARM::t2ADDSrr:
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
}
}
break;
@@ -552,13 +562,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2UXTB:
case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0)
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
break;
case ARM::t2MOVi16:
// Can convert only 'pure' immediate operands, not immediates obtained as
// globals' addresses.
if (MI->getOperand(1).isImm())
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
break;
case ARM::t2CMPrr: {
// Try to reduce to the lo-reg only version first. Why there are two
@@ -568,9 +578,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
{ ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
- if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
}
}
return false;
@@ -579,14 +589,32 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef) {
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
- if (Reg0 != Reg1) {
+ // t2MUL is "special". The tied source operand is second, not first.
+ if (MI->getOpcode() == ARM::t2MUL) {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ // Early exit if the regs aren't all low regs.
+ if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
+ || !isARMLowRegister(Reg2))
+ return false;
+ if (Reg0 != Reg2) {
+ // If the other operand also isn't the same as the destination, we
+ // can't reduce.
+ if (Reg1 != Reg0)
+ return false;
+ // Try to commute the operands to make it a 2-address instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ } else if (Reg0 != Reg1) {
// Try to commute the operands to make it a 2-address instruction.
unsigned CommOpIdx1, CommOpIdx2;
if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
@@ -637,12 +665,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI))
+ canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -666,7 +694,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++Num2Addrs;
return true;
}
@@ -674,7 +702,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef) {
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
@@ -727,12 +756,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI))
+ canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -772,7 +801,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -817,13 +846,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
MachineInstr *CPSRDef = 0;
+ MachineInstr *BundleMI = 0;
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ bool IsSelfLoop = MBB.isSuccessor(&MBB);
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
for (; MII != E; MII = NextMII) {
NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
unsigned Opcode = MI->getOpcode();
@@ -832,9 +870,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
const ReduceEntry &Entry = ReduceTable[OPI->second];
// Ignore "special" cases for now.
if (Entry.Special) {
- if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
goto ProcessNext;
@@ -842,31 +880,46 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Try to transform to a 16-bit two-address instruction.
if (Entry.NarrowOpc2 &&
- ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
goto ProcessNext;
}
// Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 &&
- ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
}
ProcessNext:
+ if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ if (BundleMI->killsRegister(ARM::CPSR))
+ LiveCPSR = false;
+ MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
+ if (MO && !MO->isDead())
+ LiveCPSR = true;
+ }
+
bool DefCPSR = false;
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
- if (MI->getDesc().isCall())
+ if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
- else if (DefCPSR)
+ IsSelfLoop = false;
+ } else if (DefCPSR) {
// This is the last CPSR defining instruction.
CPSRDef = MI;
+ IsSelfLoop = false;
+ }
}
return Modified;
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
deleted file mode 100644
index 6ffaf45f4ed1..000000000000
--- a/lib/Target/Alpha/Alpha.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Alpha back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_ALPHA_H
-#define TARGET_ALPHA_H
-
-#include "MCTargetDesc/AlphaMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- namespace Alpha {
- // These describe LDAx
-
- static const int IMM_LOW = -32768;
- static const int IMM_HIGH = 32767;
- static const int IMM_MULT = 65536;
- }
-
- class AlphaTargetMachine;
- class FunctionPass;
- class formatted_raw_ostream;
-
- FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
- FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
- FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
- JITCodeEmitter &JCE);
- FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
- FunctionPass *createAlphaBranchSelectionPass();
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
deleted file mode 100644
index ae79c2e4b70e..000000000000
--- a/lib/Target/Alpha/Alpha.td
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Get the target-independent interfaces which we are implementing...
-//
-include "llvm/Target/Target.td"
-
-//Alpha is little endian
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
- "Enable CIX extensions">;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "AlphaRegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Description
-//===----------------------------------------------------------------------===//
-
-include "AlphaCallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Schedule Description
-//===----------------------------------------------------------------------===//
-
-include "AlphaSchedule.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "AlphaInstrInfo.td"
-
-def AlphaInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// Alpha Processor Definitions
-//===----------------------------------------------------------------------===//
-
-def : Processor<"generic", Alpha21264Itineraries, []>;
-def : Processor<"ev6" , Alpha21264Itineraries, []>;
-def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>;
-
-//===----------------------------------------------------------------------===//
-// The Alpha Target
-//===----------------------------------------------------------------------===//
-
-
-def Alpha : Target {
- // Pull in Instruction Info:
- let InstructionSet = AlphaInstrInfo;
-}
diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp
deleted file mode 100644
index 5dce06ac86a5..000000000000
--- a/lib/Target/Alpha/AlphaAsmPrinter.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format Alpha assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- struct AlphaAsmPrinter : public AsmPrinter {
- /// Unique incrementer for label values for referencing Global values.
- ///
-
- explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
- : AsmPrinter(tm, Streamer) {}
-
- virtual const char *getPassName() const {
- return "Alpha Assembly Printer";
- }
- void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
- }
- static const char *getRegisterName(unsigned RegNo);
-
- void printOp(const MachineOperand &MO, raw_ostream &O);
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd();
- void EmitStartOfAsmFile(Module &M);
-
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- bool PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O);
- };
-} // end of anonymous namespace
-
-#include "AlphaGenAsmWriter.inc"
-
-void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- if (MO.isReg()) {
- assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
- "Not physreg??");
- O << getRegisterName(MO.getReg());
- } else if (MO.isImm()) {
- O << MO.getImm();
- assert(MO.getImm() < (1 << 30));
- } else {
- printOp(MO, O);
- }
-}
-
-
-void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- O << getRegisterName(MO.getReg());
- return;
-
- case MachineOperand::MO_Immediate:
- assert(0 && "printOp() does not handle immediate values");
- return;
-
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
-
- case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
- << MO.getIndex();
- return;
-
- case MachineOperand::MO_ExternalSymbol:
- O << MO.getSymbolName();
- return;
-
- case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
- return;
-
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
- return;
-
- default:
- O << "<unknown operand type: " << MO.getType() << ">";
- return;
- }
-}
-
-/// EmitFunctionBodyStart - Targets can override this to emit stuff before
-/// the first basic block in the function.
-void AlphaAsmPrinter::EmitFunctionBodyStart() {
- OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
-}
-
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
-void AlphaAsmPrinter::EmitFunctionBodyEnd() {
- OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
-}
-
-void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
- OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
- OutStreamer.EmitRawText(StringRef("\t.set noat"));
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- printOperand(MI, OpNo, O);
- return false;
-}
-
-bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
- O << "0(";
- printOperand(MI, OpNo, O);
- O << ")";
- return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeAlphaAsmPrinter() {
- RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
-}
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
deleted file mode 100644
index 376811709536..000000000000
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Replace Pseudo COND_BRANCH_* with their appropriate real branch
-// Simplified version of the PPC Branch Selector
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCAsmInfo.h"
-using namespace llvm;
-
-namespace {
- struct AlphaBSel : public MachineFunctionPass {
- static char ID;
- AlphaBSel() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual const char *getPassName() const {
- return "Alpha Branch Selection";
- }
- };
- char AlphaBSel::ID = 0;
-}
-
-/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
-/// Pass
-///
-FunctionPass *llvm::createAlphaBranchSelectionPass() {
- return new AlphaBSel();
-}
-
-bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
-
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = MFI;
-
- for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
- MBBI != EE; ++MBBI) {
- if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
- MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
-
- // condbranch operands:
- // 0. bc opcode
- // 1. reg
- // 2. target MBB
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
- MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
- }
- }
- }
-
- return true;
-}
-
diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td
deleted file mode 100644
index bde8819f46e4..000000000000
--- a/lib/Target/Alpha/AlphaCallingConv.td
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for Alpha architecture.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Alpha Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-def RetCC_Alpha : CallingConv<[
- // i64 is returned in register R0
- // R1 is an llvm extension, I don't know what gcc does
- CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
-
- // f32 / f64 are returned in F0/F1
- CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
-]>;
-
-//===----------------------------------------------------------------------===//
-// Alpha Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CC_Alpha : CallingConv<[
- // The first 6 arguments are passed in registers, whether integer or
- // floating-point
- CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
- [F16, F17, F18, F19, F20, F21]>>,
-
- CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
- [R16, R17, R18, R19, R20, R21]>>,
-
- // Stack slots are 8 bytes in size and 8-byte aligned.
- CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
-]>;
diff --git a/lib/Target/Alpha/AlphaFrameLowering.cpp b/lib/Target/Alpha/AlphaFrameLowering.cpp
deleted file mode 100644
index 690cd1da9c1d..000000000000
--- a/lib/Target/Alpha/AlphaFrameLowering.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaFrameLowering.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/ADT/Twine.h"
-
-using namespace llvm;
-
-static long getUpper16(long l) {
- long y = l / Alpha::IMM_MULT;
- if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
- ++y;
- return y;
-}
-
-static long getLower16(long l) {
- long h = getUpper16(l);
- return l - h * Alpha::IMM_MULT;
-}
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->hasVarSizedObjects();
-}
-
-void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-
- DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
- bool FP = hasFP(MF);
-
- // Handle GOP offset
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
- .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
- .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
-
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
- .addGlobalAddress(MF.getFunction());
-
- // Get the number of bytes to allocate from the FrameInfo
- long NumBytes = MFI->getStackSize();
-
- if (FP)
- NumBytes += 8; //reserve space for the old FP
-
- // Do we need to allocate space on the stack?
- if (NumBytes == 0) return;
-
- unsigned Align = getStackAlignment();
- NumBytes = (NumBytes+Align-1)/Align*Align;
-
- // Update frame info to pretend that this is part of the stack...
- MFI->setStackSize(NumBytes);
-
- // adjust stack pointer: r30 -= numbytes
- NumBytes = -NumBytes;
- if (NumBytes >= Alpha::IMM_LOW) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
- .addReg(Alpha::R30);
- } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
- .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
- .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
- } else {
- report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
- }
-
- // Now if we need to, save the old FP and set the new
- if (FP) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
- .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
- // This must be the last instr in the prolog
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
- .addReg(Alpha::R30).addReg(Alpha::R30);
- }
-
-}
-
-void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-
- assert((MBBI->getOpcode() == Alpha::RETDAG ||
- MBBI->getOpcode() == Alpha::RETDAGp)
- && "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
-
- bool FP = hasFP(MF);
-
- // Get the number of bytes allocated from the FrameInfo...
- long NumBytes = MFI->getStackSize();
-
- //now if we need to, restore the old FP
- if (FP) {
- //copy the FP into the SP (discards allocas)
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
- .addReg(Alpha::R15);
- //restore the FP
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
- .addImm(0).addReg(Alpha::R15);
- }
-
- if (NumBytes != 0) {
- if (NumBytes <= Alpha::IMM_HIGH) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
- .addReg(Alpha::R30);
- } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
- .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
- .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
- } else {
- report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
- }
- }
-}
diff --git a/lib/Target/Alpha/AlphaFrameLowering.h b/lib/Target/Alpha/AlphaFrameLowering.h
deleted file mode 100644
index ebd9e1bac190..000000000000
--- a/lib/Target/Alpha/AlphaFrameLowering.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHA_FRAMEINFO_H
-#define ALPHA_FRAMEINFO_H
-
-#include "Alpha.h"
-#include "AlphaSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
- class AlphaSubtarget;
-
-class AlphaFrameLowering : public TargetFrameLowering {
- const AlphaSubtarget &STI;
- // FIXME: This should end in MachineFunctionInfo, not here!
- mutable int curgpdist;
-public:
- explicit AlphaFrameLowering(const AlphaSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
- }
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- bool hasFP(const MachineFunction &MF) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
deleted file mode 100644
index f877c65cd61f..000000000000
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pattern matching instruction selector for Alpha,
-// converting from a legalized dag to a Alpha dag.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-using namespace llvm;
-
-namespace {
-
- //===--------------------------------------------------------------------===//
- /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
- /// instructions for SelectionDAG operations.
- class AlphaDAGToDAGISel : public SelectionDAGISel {
- static const int64_t IMM_LOW = -32768;
- static const int64_t IMM_HIGH = 32767;
- static const int64_t IMM_MULT = 65536;
- static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
- static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
-
- static int64_t get_ldah16(int64_t x) {
- int64_t y = x / IMM_MULT;
- if (x % IMM_MULT > IMM_HIGH)
- ++y;
- return y;
- }
-
- static int64_t get_lda16(int64_t x) {
- return x - get_ldah16(x) * IMM_MULT;
- }
-
- /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
- /// instruction (if not, return 0). Note that this code accepts partial
- /// zap masks. For example (and LHS, 1) is a valid zap, as long we know
- /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
- /// in checking mode. If LHS is null, we assume that the mask has already
- /// been validated before.
- uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
- uint64_t BitsToCheck = 0;
- unsigned Result = 0;
- for (unsigned i = 0; i != 8; ++i) {
- if (((Constant >> 8*i) & 0xFF) == 0) {
- // nothing to do.
- } else {
- Result |= 1 << i;
- if (((Constant >> 8*i) & 0xFF) == 0xFF) {
- // If the entire byte is set, zapnot the byte.
- } else if (LHS.getNode() == 0) {
- // Otherwise, if the mask was previously validated, we know its okay
- // to zapnot this entire byte even though all the bits aren't set.
- } else {
- // Otherwise we don't know that the it's okay to zapnot this entire
- // byte. Only do this iff we can prove that the missing bits are
- // already null, so the bytezap doesn't need to really null them.
- BitsToCheck |= ~Constant & (0xFFULL << 8*i);
- }
- }
- }
-
- // If there are missing bits in a byte (for example, X & 0xEF00), check to
- // see if the missing bits (0x1000) are already known zero if not, the zap
- // isn't okay to do, as it won't clear all the required bits.
- if (BitsToCheck &&
- !CurDAG->MaskedValueIsZero(LHS,
- APInt(LHS.getValueSizeInBits(),
- BitsToCheck)))
- return 0;
-
- return Result;
- }
-
- static uint64_t get_zapImm(uint64_t x) {
- unsigned build = 0;
- for(int i = 0; i != 8; ++i) {
- if ((x & 0x00FF) == 0x00FF)
- build |= 1 << i;
- else if ((x & 0x00FF) != 0)
- return 0;
- x >>= 8;
- }
- return build;
- }
-
-
- static uint64_t getNearPower2(uint64_t x) {
- if (!x) return 0;
- unsigned at = CountLeadingZeros_64(x);
- uint64_t complow = 1ULL << (63 - at);
- uint64_t comphigh = complow << 1;
- if (x - complow <= comphigh - x)
- return complow;
- else
- return comphigh;
- }
-
- static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
- uint64_t y = getNearPower2(x);
- if (swap)
- return (y - x) == r;
- else
- return (x - y) == r;
- }
-
- public:
- explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
- : SelectionDAGISel(TM)
- {}
-
- /// getI64Imm - Return a target constant with the specified value, of type
- /// i64.
- inline SDValue getI64Imm(int64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i64);
- }
-
- // Select - Convert the specified operand from a target-independent to a
- // target-specific node if it hasn't already been changed.
- SDNode *Select(SDNode *N);
-
- virtual const char *getPassName() const {
- return "Alpha DAG->DAG Pattern Instruction Selection";
- }
-
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
- SDValue Op0;
- switch (ConstraintCode) {
- default: return true;
- case 'm': // memory
- Op0 = Op;
- break;
- }
-
- OutOps.push_back(Op0);
- return false;
- }
-
-// Include the pieces autogenerated from the target description.
-#include "AlphaGenDAGISel.inc"
-
-private:
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
- const AlphaTargetMachine &getTargetMachine() {
- return static_cast<const AlphaTargetMachine &>(TM);
- }
-
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
- const AlphaInstrInfo *getInstrInfo() {
- return getTargetMachine().getInstrInfo();
- }
-
- SDNode *getGlobalBaseReg();
- SDNode *getGlobalRetAddr();
- void SelectCALL(SDNode *Op);
-
- };
-}
-
-/// getGlobalBaseReg - Output the instructions required to put the
-/// GOT address into a register.
-///
-SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
- unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
-}
-
-/// getGlobalRetAddr - Grab the return address.
-///
-SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
- unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
- return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
-}
-
-// Select - Convert the specified operand from a target-independent to a
-// target-specific node if it hasn't already been changed.
-SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode())
- return NULL; // Already selected.
- DebugLoc dl = N->getDebugLoc();
-
- switch (N->getOpcode()) {
- default: break;
- case AlphaISD::CALL:
- SelectCALL(N);
- return NULL;
-
- case ISD::FrameIndex: {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
- CurDAG->getTargetFrameIndex(FI, MVT::i32),
- getI64Imm(0));
- }
- case ISD::GLOBAL_OFFSET_TABLE:
- return getGlobalBaseReg();
- case AlphaISD::GlobalRetAddr:
- return getGlobalRetAddr();
-
- case AlphaISD::DivCall: {
- SDValue Chain = CurDAG->getEntryNode();
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue N2 = N->getOperand(2);
- Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1,
- SDValue(0,0));
- Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2,
- Chain.getValue(1));
- Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0,
- Chain.getValue(1));
- SDNode *CNode =
- CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue,
- Chain, Chain.getValue(1));
- Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64,
- SDValue(CNode, 1));
- return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
- }
-
- case ISD::READCYCLECOUNTER: {
- SDValue Chain = N->getOperand(0);
- return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
- Chain);
- }
-
- case ISD::Constant: {
- uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
-
- if (uval == 0) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Alpha::R31, MVT::i64);
- ReplaceUses(SDValue(N, 0), Result);
- return NULL;
- }
-
- int64_t val = (int64_t)uval;
- int32_t val32 = (int32_t)val;
- if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
- val >= IMM_LOW + IMM_LOW * IMM_MULT)
- break; //(LDAH (LDA))
- if ((uval >> 32) == 0 && //empty upper bits
- val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
- // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true
- break; //(zext (LDAH (LDA)))
- //Else use the constant pool
- ConstantInt *C = ConstantInt::get(
- Type::getInt64Ty(*CurDAG->getContext()), uval);
- SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
- SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
- SDValue(getGlobalBaseReg(), 0));
- return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other,
- CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
- }
- case ISD::TargetConstantFP:
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
- bool isDouble = N->getValueType(0) == MVT::f64;
- EVT T = isDouble ? MVT::f64 : MVT::f32;
- if (CN->getValueAPF().isPosZero()) {
- return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
- T, CurDAG->getRegister(Alpha::F31, T),
- CurDAG->getRegister(Alpha::F31, T));
- } else if (CN->getValueAPF().isNegZero()) {
- return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
- T, CurDAG->getRegister(Alpha::F31, T),
- CurDAG->getRegister(Alpha::F31, T));
- } else {
- report_fatal_error("Unhandled FP constant type");
- }
- break;
- }
-
- case ISD::SETCC:
- if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
-
- unsigned Opc = Alpha::WTF;
- bool rev = false;
- bool inv = false;
- switch(CC) {
- default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
- case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
- Opc = Alpha::CMPTEQ; break;
- case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT:
- Opc = Alpha::CMPTLT; break;
- case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE:
- Opc = Alpha::CMPTLE; break;
- case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT:
- Opc = Alpha::CMPTLT; rev = true; break;
- case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE:
- Opc = Alpha::CMPTLE; rev = true; break;
- case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
- Opc = Alpha::CMPTEQ; inv = true; break;
- case ISD::SETO:
- Opc = Alpha::CMPTUN; inv = true; break;
- case ISD::SETUO:
- Opc = Alpha::CMPTUN; break;
- };
- SDValue tmp1 = N->getOperand(rev?1:0);
- SDValue tmp2 = N->getOperand(rev?0:1);
- SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
- if (inv)
- cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl,
- MVT::f64, SDValue(cmp, 0),
- CurDAG->getRegister(Alpha::F31, MVT::f64));
- switch(CC) {
- case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
- case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
- {
- SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
- tmp1, tmp2);
- cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64,
- SDValue(cmp2, 0), SDValue(cmp, 0));
- break;
- }
- default: break;
- }
-
- SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
- MVT::i64, SDValue(cmp, 0));
- return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64,
- CurDAG->getRegister(Alpha::R31, MVT::i64),
- SDValue(LD,0));
- }
- break;
-
- case ISD::AND: {
- ConstantSDNode* SC = NULL;
- ConstantSDNode* MC = NULL;
- if (N->getOperand(0).getOpcode() == ISD::SRL &&
- (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
- (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
- uint64_t sval = SC->getZExtValue();
- uint64_t mval = MC->getZExtValue();
- // If the result is a zap, let the autogened stuff handle it.
- if (get_zapImm(N->getOperand(0), mval))
- break;
- // given mask X, and shift S, we want to see if there is any zap in the
- // mask if we play around with the botton S bits
- uint64_t dontcare = (~0ULL) >> (64 - sval);
- uint64_t mask = mval << sval;
-
- if (get_zapImm(mask | dontcare))
- mask = mask | dontcare;
-
- if (get_zapImm(mask)) {
- SDValue Z =
- SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
- N->getOperand(0).getOperand(0),
- getI64Imm(get_zapImm(mask))), 0);
- return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z,
- getI64Imm(sval));
- }
- }
- break;
- }
-
- }
-
- return SelectCode(N);
-}
-
-void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
- //TODO: add flag stuff to prevent nondeturministic breakage!
-
- SDValue Chain = N->getOperand(0);
- SDValue Addr = N->getOperand(1);
- SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
- DebugLoc dl = N->getDebugLoc();
-
- if (Addr.getOpcode() == AlphaISD::GPRelLo) {
- SDValue GOT = SDValue(getGlobalBaseReg(), 0);
- Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
- InFlag = Chain.getValue(1);
- Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other,
- MVT::Glue, Addr.getOperand(0),
- Chain, InFlag), 0);
- } else {
- Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
- InFlag = Chain.getValue(1);
- Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
- MVT::Glue, Chain, InFlag), 0);
- }
- InFlag = Chain.getValue(1);
-
- ReplaceUses(SDValue(N, 0), Chain);
- ReplaceUses(SDValue(N, 1), InFlag);
-}
-
-
-/// createAlphaISelDag - This pass converts a legalized DAG into a
-/// Alpha-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
- return new AlphaDAGToDAGISel(TM);
-}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
deleted file mode 100644
index 3057eb8c57fb..000000000000
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ /dev/null
@@ -1,962 +0,0 @@
-//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the AlphaISelLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaISelLowering.h"
-#include "AlphaTargetMachine.h"
-#include "AlphaMachineFunctionInfo.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-/// AddLiveIn - This helper function adds the specified physical register to the
-/// MachineFunction as a live in value. It also creates a corresponding virtual
-/// register for it.
-static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
- TargetRegisterClass *RC) {
- assert(RC->contains(PReg) && "Not the correct regclass!");
- unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
- MF.getRegInfo().addLiveIn(PReg, VReg);
- return VReg;
-}
-
-AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
- // Set up the TargetLowering object.
- //I am having problems with shr n i8 1
- setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-
- addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
- addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
- addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
-
- // We want to custom lower some of our intrinsics.
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
-
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- setOperationAction(ISD::FREM, MVT::f32, Expand);
- setOperationAction(ISD::FREM, MVT::f64, Expand);
-
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
-
- if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
- setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
- }
- setOperationAction(ISD::BSWAP , MVT::i64, Expand);
- setOperationAction(ISD::ROTL , MVT::i64, Expand);
- setOperationAction(ISD::ROTR , MVT::i64, Expand);
-
- setOperationAction(ISD::SREM , MVT::i64, Custom);
- setOperationAction(ISD::UREM , MVT::i64, Custom);
- setOperationAction(ISD::SDIV , MVT::i64, Custom);
- setOperationAction(ISD::UDIV , MVT::i64, Custom);
-
- setOperationAction(ISD::ADDC , MVT::i64, Expand);
- setOperationAction(ISD::ADDE , MVT::i64, Expand);
- setOperationAction(ISD::SUBC , MVT::i64, Expand);
- setOperationAction(ISD::SUBE , MVT::i64, Expand);
-
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
- setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
- setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
-
- // We don't support sin/cos/sqrt/pow
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
-
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
- setOperationAction(ISD::FPOW , MVT::f32, Expand);
- setOperationAction(ISD::FPOW , MVT::f64, Expand);
-
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
-
- setOperationAction(ISD::SETCC, MVT::f32, Promote);
-
- setOperationAction(ISD::BITCAST, MVT::f32, Promote);
-
- setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
-
- // Not implemented yet.
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-
- // We want to legalize GlobalAddress and ConstantPool and
- // ExternalSymbols nodes into the appropriate instructions to
- // materialize the address.
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
- setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
-
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setOperationAction(ISD::VACOPY, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::i32, Custom);
-
- setOperationAction(ISD::JumpTable, MVT::i64, Custom);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
-
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
-
- setStackPointerRegisterToSaveRestore(Alpha::R30);
-
- setJumpBufSize(272);
- setJumpBufAlignment(16);
-
- setMinFunctionAlignment(4);
-
- setInsertFencesForAtomic(true);
-
- computeRegisterProperties();
-}
-
-EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i64;
-}
-
-const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return 0;
- case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
- case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
- case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
- case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
- case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
- case AlphaISD::RelLit: return "Alpha::RelLit";
- case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
- case AlphaISD::CALL: return "Alpha::CALL";
- case AlphaISD::DivCall: return "Alpha::DivCall";
- case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
- case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
- case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
- }
-}
-
-static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- // FIXME there isn't really any debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI,
- DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
- SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
- return Lo;
-}
-
-//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
-//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
-
-//For now, just use variable size stack frame format
-
-//In a standard call, the first six items are passed in registers $16
-//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
-//of argument-to-register correspondence.) The remaining items are
-//collected in a memory argument list that is a naturally aligned
-//array of quadwords. In a standard call, this list, if present, must
-//be passed at 0(SP).
-//7 ... n 0(SP) ... (n-7)*8(SP)
-
-// //#define FP $15
-// //#define RA $26
-// //#define PV $27
-// //#define GP $29
-// //#define SP $30
-
-#include "AlphaGenCallingConv.inc"
-
-SDValue
-AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- // Alpha target does not yet support tail call optimization.
- isTailCall = false;
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
- CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
- getPointerTy(), true));
-
- SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
- SmallVector<SDValue, 12> MemOpChains;
- SDValue StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
-
- SDValue Arg = OutVals[i];
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- }
-
- // Arguments that can be passed on register must be kept at RegsToPass
- // vector
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
-
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
-
- SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- StackPtr,
- DAG.getIntPtrConstant(VA.getLocMemOffset()));
-
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),false, false, 0));
- }
- }
-
- // Transform all store nodes into one single node because all store nodes are
- // independent of each other.
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain and
- // flag operands which copy the outgoing args into registers. The InFlag in
- // necessary since all emitted instructions must be stuck together.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
-
- Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getConstant(NumBytes, getPointerTy(), true),
- DAG.getConstant(0, getPointerTy(), true),
- InFlag);
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
-}
-
-/// LowerCallResult - Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-///
-SDValue
-AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
-
- // Assign locations to each value returned by this call.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
-
- // Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
-
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- VA.getLocVT(), InFlag).getValue(1);
- SDValue RetValue = Chain.getValue(0);
- InFlag = Chain.getValue(2);
-
- // If this is an 8/16/32-bit value, it is really passed promoted to 64
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
-
- InVals.push_back(RetValue);
- }
-
- return Chain;
-}
-
-SDValue
-AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
-
- unsigned args_int[] = {
- Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
- unsigned args_float[] = {
- Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
-
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
- SDValue argt;
- EVT ObjectVT = Ins[ArgNo].VT;
- SDValue ArgVal;
-
- if (ArgNo < 6) {
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Invalid value type!");
- case MVT::f64:
- args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
- &Alpha::F8RCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
- break;
- case MVT::f32:
- args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
- &Alpha::F4RCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
- break;
- case MVT::i64:
- args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
- &Alpha::GPRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
- break;
- }
- } else { //more args
- // Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
-
- // Create the SelectionDAG nodes corresponding to a load
- //from this parameter
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
- false, false, 0);
- }
- InVals.push_back(ArgVal);
- }
-
- // If the functions takes variable number of arguments, copy all regs to stack
- if (isVarArg) {
- FuncInfo->setVarArgsOffset(Ins.size() * 8);
- std::vector<SDValue> LS;
- for (int i = 0; i < 6; ++i) {
- if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
- args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
- SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
- int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
- if (i == 0) FuncInfo->setVarArgsBase(FI);
- SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
- LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
- false, false, 0));
-
- if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
- args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
- argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
- FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
- SDFI = DAG.getFrameIndex(FI, MVT::i64);
- LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
- false, false, 0));
- }
-
- //Set up a token factor with all the stack traffic
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
- }
-
- return Chain;
-}
-
-SDValue
-AlphaTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
- SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
- DAG.getNode(AlphaISD::GlobalRetAddr,
- DebugLoc(), MVT::i64),
- SDValue());
- switch (Outs.size()) {
- default:
- llvm_unreachable("Do not know how to return this many arguments!");
- case 0:
- break;
- //return SDValue(); // ret void is legal
- case 1: {
- EVT ArgVT = Outs[0].VT;
- unsigned ArgReg;
- if (ArgVT.isInteger())
- ArgReg = Alpha::R0;
- else {
- assert(ArgVT.isFloatingPoint());
- ArgReg = Alpha::F0;
- }
- Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
- OutVals[0], Copy.getValue(1));
- if (DAG.getMachineFunction().getRegInfo().liveout_empty())
- DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
- break;
- }
- case 2: {
- EVT ArgVT = Outs[0].VT;
- unsigned ArgReg1, ArgReg2;
- if (ArgVT.isInteger()) {
- ArgReg1 = Alpha::R0;
- ArgReg2 = Alpha::R1;
- } else {
- assert(ArgVT.isFloatingPoint());
- ArgReg1 = Alpha::F0;
- ArgReg2 = Alpha::F1;
- }
- Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
- OutVals[0], Copy.getValue(1));
- if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
- DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
- == DAG.getMachineFunction().getRegInfo().liveout_end())
- DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
- Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
- OutVals[1], Copy.getValue(1));
- if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
- DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
- == DAG.getMachineFunction().getRegInfo().liveout_end())
- DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
- break;
- }
- }
- return DAG.getNode(AlphaISD::RET_FLAG, dl,
- MVT::Other, Copy, Copy.getValue(1));
-}
-
-void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
- SDValue &DataPtr,
- SelectionDAG &DAG) const {
- Chain = N->getOperand(0);
- SDValue VAListP = N->getOperand(1);
- const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
- DebugLoc dl = N->getDebugLoc();
-
- SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
- MachinePointerInfo(VAListS),
- false, false, 0);
- SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
- DAG.getConstant(8, MVT::i64));
- SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
- Tmp, MachinePointerInfo(),
- MVT::i32, false, false, 0);
- DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
- if (N->getValueType(0).isFloatingPoint())
- {
- //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
- SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
- DAG.getConstant(8*6, MVT::i64));
- SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
- DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
- DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
- }
-
- SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
- DAG.getConstant(8, MVT::i64));
- Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
- MachinePointerInfo(),
- MVT::i32, false, false, 0);
-}
-
-/// LowerOperation - Provide custom lowering hooks for some operations.
-///
-SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Wasn't expecting to be able to lower this!");
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
-
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- switch (IntNo) {
- default: break; // Don't custom lower most intrinsics.
- case Intrinsic::alpha_umulh:
- return DAG.getNode(ISD::MULHU, dl, MVT::i64,
- Op.getOperand(1), Op.getOperand(2));
- }
- }
-
- case ISD::SRL_PARTS: {
- SDValue ShOpLo = Op.getOperand(0);
- SDValue ShOpHi = Op.getOperand(1);
- SDValue ShAmt = Op.getOperand(2);
- SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(64, MVT::i64), ShAmt);
- SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
- DAG.getConstant(0, MVT::i64), ISD::SETLE);
- // if 64 - shAmt <= 0
- SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
- SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(0, MVT::i64), bm);
- SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
- // else
- SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
- SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
- SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
- Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
- // Merge
- SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
- SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
- SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
- }
- // case ISD::SRA_PARTS:
-
- // case ISD::SHL_PARTS:
-
-
- case ISD::SINT_TO_FP: {
- assert(Op.getOperand(0).getValueType() == MVT::i64 &&
- "Unhandled SINT_TO_FP type in custom expander!");
- SDValue LD;
- bool isDouble = Op.getValueType() == MVT::f64;
- LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
- SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
- isDouble?MVT::f64:MVT::f32, LD);
- return FP;
- }
- case ISD::FP_TO_SINT: {
- bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
- SDValue src = Op.getOperand(0);
-
- if (!isDouble) //Promote
- src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
-
- src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
-
- return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
- }
- case ISD::ConstantPool: {
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- const Constant *C = CP->getConstVal();
- SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
- // FIXME there isn't really any debug info here
-
- SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI,
- DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
- SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
- return Lo;
- }
- case ISD::GlobalTLSAddress:
- llvm_unreachable("TLS not implemented for Alpha.");
- case ISD::GlobalAddress: {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
- GSDN->getOffset());
- // FIXME there isn't really any debug info here
-
- // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
- // && !GV->hasLinkOnceLinkage()) {
- if (GV->hasLocalLinkage()) {
- SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
- DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
- SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
- return Lo;
- } else
- return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
- DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
- }
- case ISD::ExternalSymbol: {
- return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
- DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
- ->getSymbol(), MVT::i64),
- DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
- }
-
- case ISD::UREM:
- case ISD::SREM:
- //Expand only on constant case
- if (Op.getOperand(1).getOpcode() == ISD::Constant) {
- EVT VT = Op.getNode()->getValueType(0);
- SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
- BuildUDIV(Op.getNode(), DAG, NULL) :
- BuildSDIV(Op.getNode(), DAG, NULL);
- Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
- return Tmp1;
- }
- //fall through
- case ISD::SDIV:
- case ISD::UDIV:
- if (Op.getValueType().isInteger()) {
- if (Op.getOperand(1).getOpcode() == ISD::Constant)
- return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
- : BuildUDIV(Op.getNode(), DAG, NULL);
- const char* opstr = 0;
- switch (Op.getOpcode()) {
- case ISD::UREM: opstr = "__remqu"; break;
- case ISD::SREM: opstr = "__remq"; break;
- case ISD::UDIV: opstr = "__divqu"; break;
- case ISD::SDIV: opstr = "__divq"; break;
- }
- SDValue Tmp1 = Op.getOperand(0),
- Tmp2 = Op.getOperand(1),
- Addr = DAG.getExternalSymbol(opstr, MVT::i64);
- return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
- }
- break;
-
- case ISD::VAARG: {
- SDValue Chain, DataPtr;
- LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
-
- SDValue Result;
- if (Op.getValueType() == MVT::i32)
- Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
- MachinePointerInfo(), MVT::i32, false, false, 0);
- else
- Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
- MachinePointerInfo(),
- false, false, 0);
- return Result;
- }
- case ISD::VACOPY: {
- SDValue Chain = Op.getOperand(0);
- SDValue DestP = Op.getOperand(1);
- SDValue SrcP = Op.getOperand(2);
- const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
- const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-
- SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
- MachinePointerInfo(SrcS),
- false, false, 0);
- SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
- MachinePointerInfo(DestS),
- false, false, 0);
- SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
- DAG.getConstant(8, MVT::i64));
- Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
- NP, MachinePointerInfo(), MVT::i32, false, false, 0);
- SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
- DAG.getConstant(8, MVT::i64));
- return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
- MachinePointerInfo(), MVT::i32,
- false, false, 0);
- }
- case ISD::VASTART: {
- MachineFunction &MF = DAG.getMachineFunction();
- AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
-
- SDValue Chain = Op.getOperand(0);
- SDValue VAListP = Op.getOperand(1);
- const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-
- // vastart stores the address of the VarArgsBase and VarArgsOffset
- SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
- SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP,
- MachinePointerInfo(VAListS), false, false, 0);
- SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
- DAG.getConstant(8, MVT::i64));
- return DAG.getTruncStore(S1, dl,
- DAG.getConstant(FuncInfo->getVarArgsOffset(),
- MVT::i64),
- SA2, MachinePointerInfo(),
- MVT::i32, false, false, 0);
- }
- case ISD::RETURNADDR:
- return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
- //FIXME: implement
- case ISD::FRAMEADDR: break;
- }
-
- return SDValue();
-}
-
-void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const {
- DebugLoc dl = N->getDebugLoc();
- assert(N->getValueType(0) == MVT::i32 &&
- N->getOpcode() == ISD::VAARG &&
- "Unknown node to custom promote!");
-
- SDValue Chain, DataPtr;
- LowerVAARG(N, Chain, DataPtr, DAG);
- SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
- MachinePointerInfo(),
- false, false, 0);
- Results.push_back(Res);
- Results.push_back(SDValue(Res.getNode(), 1));
-}
-
-
-//Inline Asm
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-AlphaTargetLowering::ConstraintType
-AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- default: break;
- case 'f':
- case 'r':
- return C_RegisterClass;
- }
- }
- return TargetLowering::getConstraintType(Constraint);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-AlphaTargetLowering::getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const {
- ConstraintWeight weight = CW_Invalid;
- Value *CallOperandVal = info.CallOperandVal;
- // If we don't have a value, we can't do a match,
- // but allow it at the lowest weight.
- if (CallOperandVal == NULL)
- return CW_Default;
- // Look at the constraint type.
- switch (*constraint) {
- default:
- weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
- case 'f':
- weight = CW_Register;
- break;
- }
- return weight;
-}
-
-/// Given a register class constraint, like 'r', if this corresponds directly
-/// to an LLVM register class, return a register of 0 and the register class
-/// pointer.
-std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
-{
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- case 'r':
- return std::make_pair(0U, Alpha::GPRCRegisterClass);
- case 'f':
- return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
- std::make_pair(0U, Alpha::F4RCRegisterClass);
- }
- }
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-MachineBasicBlock *
-AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- assert((MI->getOpcode() == Alpha::CAS32 ||
- MI->getOpcode() == Alpha::CAS64 ||
- MI->getOpcode() == Alpha::LAS32 ||
- MI->getOpcode() == Alpha::LAS64 ||
- MI->getOpcode() == Alpha::SWAP32 ||
- MI->getOpcode() == Alpha::SWAP64) &&
- "Unexpected instr type to insert");
-
- bool is32 = MI->getOpcode() == Alpha::CAS32 ||
- MI->getOpcode() == Alpha::LAS32 ||
- MI->getOpcode() == Alpha::SWAP32;
-
- //Load locked store conditional for atomic ops take on the same form
- //start:
- //ll
- //do stuff (maybe branch to exit)
- //sc
- //test sc and maybe branck to start
- //exit:
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- DebugLoc dl = MI->getDebugLoc();
- MachineFunction::iterator It = BB;
- ++It;
-
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-
- sinkMBB->splice(sinkMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- thisMBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- F->insert(It, llscMBB);
- F->insert(It, sinkMBB);
-
- BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
-
- unsigned reg_res = MI->getOperand(0).getReg(),
- reg_ptr = MI->getOperand(1).getReg(),
- reg_v2 = MI->getOperand(2).getReg(),
- reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
-
- BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
- reg_res).addImm(0).addReg(reg_ptr);
- switch (MI->getOpcode()) {
- case Alpha::CAS32:
- case Alpha::CAS64: {
- unsigned reg_cmp
- = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
- BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
- .addReg(reg_v2).addReg(reg_res);
- BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
- .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
- BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
- .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
- break;
- }
- case Alpha::LAS32:
- case Alpha::LAS64: {
- BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
- .addReg(reg_res).addReg(reg_v2);
- break;
- }
- case Alpha::SWAP32:
- case Alpha::SWAP64: {
- BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
- .addReg(reg_v2).addReg(reg_v2);
- break;
- }
- }
- BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
- .addReg(reg_store).addImm(0).addReg(reg_ptr);
- BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
- .addImm(0).addReg(reg_store).addMBB(llscMBB);
- BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
-
- thisMBB->addSuccessor(llscMBB);
- llscMBB->addSuccessor(llscMBB);
- llscMBB->addSuccessor(sinkMBB);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
-
- return sinkMBB;
-}
-
-bool
-AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- // The Alpha target isn't yet aware of offsets.
- return false;
-}
-
-bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- if (VT != MVT::f32 && VT != MVT::f64)
- return false;
- // +0.0 F31
- // +0.0f F31
- // -0.0 -F31
- // -0.0f -F31
- return Imm.isZero() || Imm.isNegZero();
-}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
deleted file mode 100644
index 80f8efaea5d2..000000000000
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ /dev/null
@@ -1,142 +0,0 @@
-//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Alpha uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
-#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
-
-#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "Alpha.h"
-
-namespace llvm {
-
- namespace AlphaISD {
- enum NodeType {
- // Start the numbering where the builting ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- //These corrospond to the identical Instruction
- CVTQT_, CVTQS_, CVTTQ_,
-
- /// GPRelHi/GPRelLo - These represent the high and low 16-bit
- /// parts of a global address respectively.
- GPRelHi, GPRelLo,
-
- /// RetLit - Literal Relocation of a Global
- RelLit,
-
- /// GlobalRetAddr - used to restore the return address
- GlobalRetAddr,
-
- /// CALL - Normal call.
- CALL,
-
- /// DIVCALL - used for special library calls for div and rem
- DivCall,
-
- /// return flag operand
- RET_FLAG,
-
- /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
- /// corresponds to the COND_BRANCH pseudo instruction.
- /// *PRC is the input register to compare to zero,
- /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
- /// DESTBB is the destination block to branch to, and INFLAG is
- /// an optional input flag argument.
- COND_BRANCH_I, COND_BRANCH_F
-
- };
- }
-
- class AlphaTargetLowering : public TargetLowering {
- public:
- explicit AlphaTargetLowering(TargetMachine &TM);
-
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
-
- /// getSetCCResultType - Get the SETCC result ValueType
- virtual EVT getSetCCResultType(EVT VT) const;
-
- /// LowerOperation - Provide custom lowering hooks for some operations.
- ///
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
- /// ReplaceNodeResults - Replace the results of node with an illegal result
- /// type with new values built out of custom code.
- ///
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
-
- // Friendly names for dumps
- const char *getTargetNodeName(unsigned Opcode) const;
-
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- ConstraintType getConstraintType(const std::string &Constraint) const;
-
- /// Examine constraint string and operand type and determine a weight value.
- /// The operand object must already have been set up with the operand type.
- ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
-
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const;
-
- MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
- /// isFPImmLegal - Returns true if the target can instruction select the
- /// specified FP immediate natively. If false, the legalizer will
- /// materialize the FP immediate as a load from a constant pool.
- virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-
- private:
- // Helpers for custom lowering.
- void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
- SelectionDAG &DAG) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
- };
-}
-
-#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
deleted file mode 100644
index 6f4ebf279643..000000000000
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ /dev/null
@@ -1,268 +0,0 @@
-//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-//3.3:
-//Memory
-//Branch
-//Operate
-//Floating-point
-//PALcode
-
-def u8imm : Operand<i64>;
-def s14imm : Operand<i64>;
-def s16imm : Operand<i64>;
-def s21imm : Operand<i64>;
-def s64imm : Operand<i64>;
-def u64imm : Operand<i64>;
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-// Alpha instruction baseline
-class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
- field bits<32> Inst;
- let Namespace = "Alpha";
- let AsmString = asmstr;
- let Inst{31-26} = op;
- let Itinerary = itin;
-}
-
-
-//3.3.1
-class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let canFoldAsLoad = load;
- let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
-
- bits<5> Ra;
- bits<16> disp;
- bits<5> Rb;
-
- let Inst{25-21} = Ra;
- let Inst{20-16} = Rb;
- let Inst{15-0} = disp;
-}
-class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- bits<5> Ra;
-
- let OutOperandList = (outs GPRC:$RA);
- let InOperandList = (ins);
- let Inst{25-21} = Ra;
- let Inst{20-16} = 0;
- let Inst{15-0} = fc;
-}
-class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let OutOperandList = (outs);
- let InOperandList = (ins);
- let Inst{25-21} = 0;
- let Inst{20-16} = 0;
- let Inst{15-0} = fc;
-}
-
-class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- bits<5> Ra;
- bits<5> Rb;
- bits<14> disp;
-
- let OutOperandList = (outs);
- let InOperandList = OL;
-
- let Inst{25-21} = Ra;
- let Inst{20-16} = Rb;
- let Inst{15-14} = TB;
- let Inst{13-0} = disp;
-}
-class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern=pattern;
- bits<5> Ra;
- bits<5> Rb;
- bits<14> disp;
-
- let OutOperandList = (outs);
- let InOperandList = OL;
-
- let Inst{25-21} = Ra;
- let Inst{20-16} = Rb;
- let Inst{15-14} = TB;
- let Inst{13-0} = disp;
-}
-
-//3.3.2
-def target : Operand<OtherVT> {}
-
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let OutOperandList = (outs);
- let InOperandList = OL;
- bits<64> Opc; //dummy
- bits<5> Ra;
- bits<21> disp;
-
- let Inst{25-21} = Ra;
- let Inst{20-0} = disp;
-}
-}
-
-let isBranch = 1, isTerminator = 1 in
-class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let OutOperandList = (outs);
- let InOperandList = (ins target:$DISP);
- bits<5> Ra;
- bits<21> disp;
-
- let Inst{25-21} = Ra;
- let Inst{20-0} = disp;
-}
-
-//3.3.3
-class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let OutOperandList = (outs GPRC:$RC);
- let InOperandList = (ins GPRC:$RA, GPRC:$RB);
-
- bits<5> Rc;
- bits<5> Ra;
- bits<5> Rb;
- bits<7> Function = fun;
-
- let Inst{25-21} = Ra;
- let Inst{20-16} = Rb;
- let Inst{15-13} = 0;
- let Inst{12} = 0;
- let Inst{11-5} = Function;
- let Inst{4-0} = Rc;
-}
-
-class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let OutOperandList = (outs GPRC:$RC);
- let InOperandList = (ins GPRC:$RB);
-
- bits<5> Rc;
- bits<5> Rb;
- bits<7> Function = fun;
-
- let Inst{25-21} = 31;
- let Inst{20-16} = Rb;
- let Inst{15-13} = 0;
- let Inst{12} = 0;
- let Inst{11-5} = Function;
- let Inst{4-0} = Rc;
-}
-
-class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let OutOperandList = (outs GPRC:$RDEST);
- let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
- let Constraints = "$RFALSE = $RDEST";
- let DisableEncoding = "$RFALSE";
-
- bits<5> Rc;
- bits<5> Ra;
- bits<5> Rb;
- bits<7> Function = fun;
-
-// let Constraints = "$RFALSE = $RDEST";
- let Inst{25-21} = Ra;
- let Inst{20-16} = Rb;
- let Inst{15-13} = 0;
- let Inst{12} = 0;
- let Inst{11-5} = Function;
- let Inst{4-0} = Rc;
-}
-
-
-class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let OutOperandList = (outs GPRC:$RC);
- let InOperandList = (ins GPRC:$RA, u8imm:$L);
-
- bits<5> Rc;
- bits<5> Ra;
- bits<8> LIT;
- bits<7> Function = fun;
-
- let Inst{25-21} = Ra;
- let Inst{20-13} = LIT;
- let Inst{12} = 1;
- let Inst{11-5} = Function;
- let Inst{4-0} = Rc;
-}
-
-class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
- let OutOperandList = (outs GPRC:$RDEST);
- let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
- let Constraints = "$RFALSE = $RDEST";
- let DisableEncoding = "$RFALSE";
-
- bits<5> Rc;
- bits<5> Ra;
- bits<8> LIT;
- bits<7> Function = fun;
-
-// let Constraints = "$RFALSE = $RDEST";
- let Inst{25-21} = Ra;
- let Inst{20-13} = LIT;
- let Inst{12} = 1;
- let Inst{11-5} = Function;
- let Inst{4-0} = Rc;
-}
-
-//3.3.4
-class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let Pattern = pattern;
-
- bits<5> Fc;
- bits<5> Fa;
- bits<5> Fb;
- bits<11> Function = fun;
-
- let Inst{25-21} = Fa;
- let Inst{20-16} = Fb;
- let Inst{15-5} = Function;
- let Inst{4-0} = Fc;
-}
-
-//3.3.5
-class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
- : InstAlpha<opcode, asmstr, itin> {
- let OutOperandList = (outs);
- let InOperandList = OL;
- bits<26> Function;
-
- let Inst{25-0} = Function;
-}
-
-
-// Pseudo instructions.
-class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin>
- : InstAlpha<0, nm, itin> {
- let OutOperandList = OOL;
- let InOperandList = IOL;
- let Pattern = pattern;
-
-}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
deleted file mode 100644
index 8df2ed75f625..000000000000
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-
-#define GET_INSTRINFO_CTOR
-#include "AlphaGenInstrInfo.inc"
-using namespace llvm;
-
-AlphaInstrInfo::AlphaInstrInfo()
- : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
- RI(*this) {
-}
-
-
-unsigned
-AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- case Alpha::LDL:
- case Alpha::LDQ:
- case Alpha::LDBU:
- case Alpha::LDWU:
- case Alpha::LDS:
- case Alpha::LDT:
- if (MI->getOperand(1).isFI()) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
-}
-
-unsigned
-AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- case Alpha::STL:
- case Alpha::STQ:
- case Alpha::STB:
- case Alpha::STW:
- case Alpha::STS:
- case Alpha::STT:
- if (MI->getOperand(1).isFI()) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
-}
-
-static bool isAlphaIntCondCode(unsigned Opcode) {
- switch (Opcode) {
- case Alpha::BEQ:
- case Alpha::BNE:
- case Alpha::BGE:
- case Alpha::BGT:
- case Alpha::BLE:
- case Alpha::BLT:
- case Alpha::BLBC:
- case Alpha::BLBS:
- return true;
- default:
- return false;
- }
-}
-
-unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
- "Alpha branch conditions have two components!");
-
- // One-way branch.
- if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch
- BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
- else // Conditional branch
- if (isAlphaIntCondCode(Cond[0].getImm()))
- BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- else
- BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- return 1;
- }
-
- // Two-way Conditional Branch.
- if (isAlphaIntCondCode(Cond[0].getImm()))
- BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- else
- BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
- return 2;
-}
-
-void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- llvm_unreachable("Attempt to copy register that is not GPR or FPR");
- }
-}
-
-void
-AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
- // << FrameIdx << "\n";
- //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- if (RC == Alpha::F4RCRegisterClass)
- BuildMI(MBB, MI, DL, get(Alpha::STS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FrameIdx).addReg(Alpha::F31);
- else if (RC == Alpha::F8RCRegisterClass)
- BuildMI(MBB, MI, DL, get(Alpha::STT))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FrameIdx).addReg(Alpha::F31);
- else if (RC == Alpha::GPRCRegisterClass)
- BuildMI(MBB, MI, DL, get(Alpha::STQ))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FrameIdx).addReg(Alpha::F31);
- else
- llvm_unreachable("Unhandled register class");
-}
-
-void
-AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
- // << FrameIdx << "\n";
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- if (RC == Alpha::F4RCRegisterClass)
- BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
- .addFrameIndex(FrameIdx).addReg(Alpha::F31);
- else if (RC == Alpha::F8RCRegisterClass)
- BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
- .addFrameIndex(FrameIdx).addReg(Alpha::F31);
- else if (RC == Alpha::GPRCRegisterClass)
- BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
- .addFrameIndex(FrameIdx).addReg(Alpha::F31);
- else
- llvm_unreachable("Unhandled register class");
-}
-
-static unsigned AlphaRevCondCode(unsigned Opcode) {
- switch (Opcode) {
- case Alpha::BEQ: return Alpha::BNE;
- case Alpha::BNE: return Alpha::BEQ;
- case Alpha::BGE: return Alpha::BLT;
- case Alpha::BGT: return Alpha::BLE;
- case Alpha::BLE: return Alpha::BGT;
- case Alpha::BLT: return Alpha::BGE;
- case Alpha::BLBC: return Alpha::BLBS;
- case Alpha::BLBS: return Alpha::BLBC;
- case Alpha::FBEQ: return Alpha::FBNE;
- case Alpha::FBNE: return Alpha::FBEQ;
- case Alpha::FBGE: return Alpha::FBLT;
- case Alpha::FBGT: return Alpha::FBLE;
- case Alpha::FBLE: return Alpha::FBGT;
- case Alpha::FBLT: return Alpha::FBGE;
- default:
- llvm_unreachable("Unknown opcode");
- }
- return 0; // Not reached
-}
-
-// Branch analysis.
-bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
- if (!isUnpredicatedTerminator(I))
- return false;
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (LastInst->getOpcode() == Alpha::BR) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
- LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
- // Block ends with fall-through condbranch.
- TBB = LastInst->getOperand(2).getMBB();
- Cond.push_back(LastInst->getOperand(0));
- Cond.push_back(LastInst->getOperand(1));
- return false;
- }
- // Otherwise, don't know what this is.
- return true;
- }
-
- // Get the instruction before it if it's a terminator.
- MachineInstr *SecondLastInst = I;
-
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() &&
- isUnpredicatedTerminator(--I))
- return true;
-
- // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
- if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
- SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) &&
- LastInst->getOpcode() == Alpha::BR) {
- TBB = SecondLastInst->getOperand(2).getMBB();
- Cond.push_back(SecondLastInst->getOperand(0));
- Cond.push_back(SecondLastInst->getOperand(1));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // If the block ends with two Alpha::BRs, handle it. The second one is not
- // executed, so remove it.
- if (SecondLastInst->getOpcode() == Alpha::BR &&
- LastInst->getOpcode() == Alpha::BR) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
-
- // Otherwise, can't handle this.
- return true;
-}
-
-unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
- if (I->getOpcode() != Alpha::BR &&
- I->getOpcode() != Alpha::COND_BRANCH_I &&
- I->getOpcode() != Alpha::COND_BRANCH_F)
- return 0;
-
- // Remove the branch.
- I->eraseFromParent();
-
- I = MBB.end();
-
- if (I == MBB.begin()) return 1;
- --I;
- if (I->getOpcode() != Alpha::COND_BRANCH_I &&
- I->getOpcode() != Alpha::COND_BRANCH_F)
- return 1;
-
- // Remove the branch.
- I->eraseFromParent();
- return 2;
-}
-
-void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- DebugLoc DL;
- BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31)
- .addReg(Alpha::R31);
-}
-
-bool AlphaInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
- Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
- return false;
-}
-
-/// getGlobalBaseReg - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
- unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg();
- if (GlobalBaseReg != 0)
- return GlobalBaseReg;
-
- // Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF->front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
- GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
- BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
- GlobalBaseReg).addReg(Alpha::R29);
- RegInfo.addLiveIn(Alpha::R29);
-
- AlphaFI->setGlobalBaseReg(GlobalBaseReg);
- return GlobalBaseReg;
-}
-
-/// getGlobalRetAddr - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
- AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
- unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr();
- if (GlobalRetAddr != 0)
- return GlobalRetAddr;
-
- // Insert the set of GlobalRetAddr into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF->front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
- GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
- BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
- GlobalRetAddr).addReg(Alpha::R26);
- RegInfo.addLiveIn(Alpha::R26);
-
- AlphaFI->setGlobalRetAddr(GlobalRetAddr);
- return GlobalRetAddr;
-}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
deleted file mode 100644
index 337a85cdf22d..000000000000
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAINSTRUCTIONINFO_H
-#define ALPHAINSTRUCTIONINFO_H
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "AlphaRegisterInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "AlphaGenInstrInfo.inc"
-
-namespace llvm {
-
-class AlphaInstrInfo : public AlphaGenInstrInfo {
- const AlphaRegisterInfo RI;
-public:
- AlphaInstrInfo();
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
-
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- /// getGlobalBaseReg - Return a virtual register initialized with the
- /// the global base register value. Output instructions required to
- /// initialize the register in the function entry block, if necessary.
- ///
- unsigned getGlobalBaseReg(MachineFunction *MF) const;
-
- /// getGlobalRetAddr - Return a virtual register initialized with the
- /// the global return address register value. Output instructions required to
- /// initialize the register in the function entry block, if necessary.
- ///
- unsigned getGlobalRetAddr(MachineFunction *MF) const;
-};
-
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
deleted file mode 100644
index c8c9377c3d8d..000000000000
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ /dev/null
@@ -1,1159 +0,0 @@
-//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-include "AlphaInstrFormats.td"
-
-//********************
-//Custom DAG Nodes
-//********************
-
-def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [
- SDTCisFP<1>, SDTCisFP<0>
-]>;
-def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>;
-def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>;
-def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>;
-def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>;
-def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
-def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>;
-
-def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-
-// These are target-independent nodes, but have target-specific formats.
-def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
-def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
- SDTCisVT<1, i64> ]>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-//********************
-//Paterns for matching
-//********************
-def invX : SDNodeXForm<imm, [{ //invert
- return getI64Imm(~N->getZExtValue());
-}]>;
-def negX : SDNodeXForm<imm, [{ //negate
- return getI64Imm(~N->getZExtValue() + 1);
-}]>;
-def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
- return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
-}]>;
-def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
- return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
-}]>;
-def LL16 : SDNodeXForm<imm, [{ //lda part of constant
- return getI64Imm(get_lda16(N->getZExtValue()));
-}]>;
-def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
- return getI64Imm(get_ldah16(N->getZExtValue()));
-}]>;
-def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
- ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
- return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
-}]>;
-def nearP2X : SDNodeXForm<imm, [{
- return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
-}]>;
-def nearP2RemX : SDNodeXForm<imm, [{
- uint64_t x =
- abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
- return getI64Imm(Log2_64(x));
-}]>;
-
-def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
- return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
-}]>;
-def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
- return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
-}], invX>;
-def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
- return ((uint64_t)~N->getZExtValue() + 1) ==
- (uint8_t)((uint64_t)~N->getZExtValue() + 1);
-}], negX>;
-def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
- return ((int64_t)N->getZExtValue() << 48) >> 48 ==
- (int64_t)N->getZExtValue();
-}]>;
-def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
- return ((int64_t)N->getZExtValue() << 48) >> 48 ==
- ((int64_t)N->getZExtValue() << 32) >> 32;
-}], SExt16>;
-
-def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
- ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!RHS) return 0;
- uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
- return build != 0;
-}]>;
-
-def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
- (void)N; // silence warning.
- return true;
-}]>;
-
-def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
-def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
-def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
-def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
-def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
-def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
-def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
-def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
-def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
-def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
-
-def immRemP2n : PatLeaf<(imm), [{
- return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
- N->getZExtValue());
-}]>;
-def immRemP2 : PatLeaf<(imm), [{
- return isPowerOf2_64(N->getZExtValue() -
- getNearPower2((uint64_t)N->getZExtValue()));
-}]>;
-def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
- int64_t d = abs64((int64_t)N->getZExtValue() -
- (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
- if (isPowerOf2_64(d)) return false;
- switch (d) {
- case 1: case 3: case 5: return false;
- default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
- };
-}]>;
-
-def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
-def add4 : PatFrag<(ops node:$op1, node:$op2),
- (add (shl node:$op1, 2), node:$op2)>;
-def sub4 : PatFrag<(ops node:$op1, node:$op2),
- (sub (shl node:$op1, 2), node:$op2)>;
-def add8 : PatFrag<(ops node:$op1, node:$op2),
- (add (shl node:$op1, 3), node:$op2)>;
-def sub8 : PatFrag<(ops node:$op1, node:$op2),
- (sub (shl node:$op1, 3), node:$op2)>;
-class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
-class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
-
-//Pseudo ops for selection
-
-def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
-
-let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
-def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
- "; ADJUP $amt",
- [(callseq_start timm:$amt)], s_pseudo>;
-def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
- "; ADJDOWN $amt1",
- [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
-}
-
-def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
-def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
-def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
- "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
-
-
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
- [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
-def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
- [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
-
-def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
- [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
- [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-
-def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
- [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
- [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-}
-
-//***********************
-//Real instructions
-//***********************
-
-//Operation Form:
-
-//conditional moves, int
-
-multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
-def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
- [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
-def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
- [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
-}
-
-defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>;
-defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>;
-defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>;
-defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>;
-defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>;
-defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>;
-defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>;
-defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>;
-
-//General pattern for cmov
-def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
- (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
-def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
- (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
-
-//Invert sense when we can for constants:
-def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
- (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
- (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
- (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
- (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
- (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-
-multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq,
- string asmstr, PatFrag OpNode, InstrItinClass itin> {
- def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
- [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
- def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
- [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
- def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
- [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
- def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
- [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
-}
-
-defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
-defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
-defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
-defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
-defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
-defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
-defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
-//Const cases since legalize does sub x, int -> add x, inv(int) + 1
-def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
-
-multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
-def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
- [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
-def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
- [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
-}
-multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
-def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
- [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
-def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
- [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
-}
-
-defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>;
-defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>;
-defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>;
-defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>;
-defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>;
-defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>;
-
-defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>;
-defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>;
-defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>;
-defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
-
-def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC",
- [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
-def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC",
- [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
-def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC",
- [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
-def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC",
- [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
-def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC",
- [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
-def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC",
- [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
-def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC",
- [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
-def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC",
- [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
-
-//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
-//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
-//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
-//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
-//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
-//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
-//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
-//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
-//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
-//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
-//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
-
-//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
-//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
-//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
-//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
-//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
-//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
-//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
-//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
-//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
-//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
-//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
-//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
-//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
-//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
-
-//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
-//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
-//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
-//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
-//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
-//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
-//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
-//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
-//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
-//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
-//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
-//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
-//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
-//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
-
-def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
-
-// Define the pattern that produces ZAPNOTi.
-def : Pat<(zappat:$imm GPRC:$RA),
- (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
-
-
-//Comparison, int
-//So this is a waste of what this instruction can do, but it still saves something
-def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC",
- [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
-def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
- [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
-def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC",
- [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC",
- [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC",
- [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
- [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
- [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
- [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
- [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
- [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
- [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC",
- [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
-
-//Patterns for unsupported int comparisons
-def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
-def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
-
-def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
-def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
-def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
-def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
-def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
-def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
-
-def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
-def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
-
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
- def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
- def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
-}
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
-def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0",
- [(brind GPRC:$RS)], s_jsr>; //Jump
-
-let isCall = 1, Ra = 26,
- Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
- R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
- F0, F1,
- F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
- F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
- def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
-}
-let isCall = 1, Ra = 26, Rb = 27, disp = 0,
- Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
- R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
- F0, F1,
- F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
- F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
- def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
-}
-
-let isCall = 1, Ra = 23, Rb = 27, disp = 0,
- Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
- def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
-
-
-def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
-
-
-let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
- [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
- [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
- [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
- [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
- [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
- [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
- [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
- [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-}
-
-
-let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
-def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
- [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
- [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
- [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
- [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
- [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
- [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
- [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
- [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-}
-
-//Load address
-let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)",
- [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
-def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
- [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address
-def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
- [], s_lda>; //Load address high
-def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
- [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high
-}
-
-let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
-def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)",
- [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
-def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
- [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
-}
-let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)",
- [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
-def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
- [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
-}
-let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
-def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)",
- [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
-def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
- [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
-}
-let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
- [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
-def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
- [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
-}
-
-
-//constpool rels
-def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
- (LDQr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
- (LDLr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
- (LDBUr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
- (LDWUr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
- (LDAr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
- (LDAHr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
- (LDSr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
- (LDTr tconstpool:$DISP, GPRC:$RB)>;
-
-//jumptable rels
-def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
- (LDAHr tjumptable:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
- (LDAr tjumptable:$DISP, GPRC:$RB)>;
-
-
-//misc ext patterns
-def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
- (LDBU immSExt16:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
- (LDWU immSExt16:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
- (LDL immSExt16:$DISP, GPRC:$RB)>;
-
-//0 disp patterns
-def : Pat<(i64 (load GPRC:$addr)),
- (LDQ 0, GPRC:$addr)>;
-def : Pat<(f64 (load GPRC:$addr)),
- (LDT 0, GPRC:$addr)>;
-def : Pat<(f32 (load GPRC:$addr)),
- (LDS 0, GPRC:$addr)>;
-def : Pat<(i64 (sextloadi32 GPRC:$addr)),
- (LDL 0, GPRC:$addr)>;
-def : Pat<(i64 (zextloadi16 GPRC:$addr)),
- (LDWU 0, GPRC:$addr)>;
-def : Pat<(i64 (zextloadi8 GPRC:$addr)),
- (LDBU 0, GPRC:$addr)>;
-def : Pat<(i64 (extloadi8 GPRC:$addr)),
- (LDBU 0, GPRC:$addr)>;
-def : Pat<(i64 (extloadi16 GPRC:$addr)),
- (LDWU 0, GPRC:$addr)>;
-def : Pat<(i64 (extloadi32 GPRC:$addr)),
- (LDL 0, GPRC:$addr)>;
-
-def : Pat<(store GPRC:$DATA, GPRC:$addr),
- (STQ GPRC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(store F8RC:$DATA, GPRC:$addr),
- (STT F8RC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(store F4RC:$DATA, GPRC:$addr),
- (STS F4RC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
- (STL GPRC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
- (STW GPRC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
- (STB GPRC:$DATA, 0, GPRC:$addr)>;
-
-
-//load address, rellocated gpdist form
-let OutOperandList = (outs GPRC:$RA),
- InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
- mayLoad = 1 in {
-def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
-def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
-}
-
-//Load quad, rellocated literal form
-let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in
-def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
- [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
-def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
- (LDQl texternalsym:$ext, GPRC:$RB)>;
-
-let OutOperandList = (outs GPRC:$RR),
- InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
- Constraints = "$RA = $RR",
- DisableEncoding = "$RR" in {
-def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
-def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
-}
-let OutOperandList = (outs GPRC:$RA),
- InOperandList = (ins s64imm:$DISP, GPRC:$RB),
- mayLoad = 1 in {
-def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
-def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
-}
-
-def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
-def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
-def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
-
-def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
- (WMB)>;
-def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
- (MB)>;
-
-def : Pat<(atomic_fence (imm), (imm)), (MB)>;
-
-//Basic Floating point ops
-
-//Floats
-
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in
-def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
- [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
-
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
-def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
- [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
-def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
- [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
-def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
- [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
-def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
- [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
-
-def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
- [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
-def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
-def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
- [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
-}
-
-//Doubles
-
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
-def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
- [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
-
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
-def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
- [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
-def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
- [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
-def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
- [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
-def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
- [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
-
-def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
- [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
-def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
-def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
- [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
-
-def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
-// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
-def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
-// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
-def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
-// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
-def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
-// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
-}
-
-//More CPYS forms:
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
-def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
- [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
-def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
- [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
-}
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
-def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
- [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
-def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
-def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
- [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
-}
-
-//conditional moves, floats
-let OutOperandList = (outs F4RC:$RDEST),
- InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
- Constraints = "$RTRUE = $RDEST" in {
-def FCMOVEQS : FPForm<0x17, 0x02A,
- "fcmoveq $RCOND,$RTRUE,$RDEST",
- [], s_fcmov>; //FCMOVE if = zero
-def FCMOVGES : FPForm<0x17, 0x02D,
- "fcmovge $RCOND,$RTRUE,$RDEST",
- [], s_fcmov>; //FCMOVE if >= zero
-def FCMOVGTS : FPForm<0x17, 0x02F,
- "fcmovgt $RCOND,$RTRUE,$RDEST",
- [], s_fcmov>; //FCMOVE if > zero
-def FCMOVLES : FPForm<0x17, 0x02E,
- "fcmovle $RCOND,$RTRUE,$RDEST",
- [], s_fcmov>; //FCMOVE if <= zero
-def FCMOVLTS : FPForm<0x17, 0x02C,
- "fcmovlt $RCOND,$RTRUE,$RDEST",
- [], s_fcmov>; // FCMOVE if < zero
-def FCMOVNES : FPForm<0x17, 0x02B,
- "fcmovne $RCOND,$RTRUE,$RDEST",
- [], s_fcmov>; //FCMOVE if != zero
-}
-//conditional moves, doubles
-let OutOperandList = (outs F8RC:$RDEST),
- InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
- Constraints = "$RTRUE = $RDEST" in {
-def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-}
-
-//misc FP selects
-//Select double
-
-def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
- (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-
-//Select single
-def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
- (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-
-
-
-let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in
-def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
- [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
-let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in
-def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
- [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in
-def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
- [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in
-def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
- [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
-
-
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
-def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
- [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
-def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
- [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
-def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
- [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in
-def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
- [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
-def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
- [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
-
-def : Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
- (f64 (FCMOVEQT F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>;
-def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
- (f32 (FCMOVEQS F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>;
-
-/////////////////////////////////////////////////////////
-//Branching
-/////////////////////////////////////////////////////////
-class br_icc<bits<6> opc, string asmstr>
- : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst),
- !strconcat(asmstr, " $R,$dst"), s_icbr>;
-class br_fcc<bits<6> opc, string asmstr>
- : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst),
- !strconcat(asmstr, " $R,$dst"), s_fbr>;
-
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-let Ra = 31, isBarrier = 1 in
-def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
-
-def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst),
- "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst",
- s_icbr>;
-def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst),
- "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
- s_fbr>;
-//Branches, int
-def BEQ : br_icc<0x39, "beq">;
-def BGE : br_icc<0x3E, "bge">;
-def BGT : br_icc<0x3F, "bgt">;
-def BLBC : br_icc<0x38, "blbc">;
-def BLBS : br_icc<0x3C, "blbs">;
-def BLE : br_icc<0x3B, "ble">;
-def BLT : br_icc<0x3A, "blt">;
-def BNE : br_icc<0x3D, "bne">;
-
-//Branches, float
-def FBEQ : br_fcc<0x31, "fbeq">;
-def FBGE : br_fcc<0x36, "fbge">;
-def FBGT : br_fcc<0x37, "fbgt">;
-def FBLE : br_fcc<0x33, "fble">;
-def FBLT : br_fcc<0x32, "fblt">;
-def FBNE : br_fcc<0x36, "fbne">;
-}
-
-//An ugly trick to get the opcode as an imm I can use
-def immBRCond : SDNodeXForm<imm, [{
- switch((uint64_t)N->getZExtValue()) {
- default: assert(0 && "Unknown branch type");
- case 0: return getI64Imm(Alpha::BEQ);
- case 1: return getI64Imm(Alpha::BNE);
- case 2: return getI64Imm(Alpha::BGE);
- case 3: return getI64Imm(Alpha::BGT);
- case 4: return getI64Imm(Alpha::BLE);
- case 5: return getI64Imm(Alpha::BLT);
- case 6: return getI64Imm(Alpha::BLBS);
- case 7: return getI64Imm(Alpha::BLBC);
- case 20: return getI64Imm(Alpha::FBEQ);
- case 21: return getI64Imm(Alpha::FBNE);
- case 22: return getI64Imm(Alpha::FBGE);
- case 23: return getI64Imm(Alpha::FBGT);
- case 24: return getI64Imm(Alpha::FBLE);
- case 25: return getI64Imm(Alpha::FBLT);
- }
-}]>;
-
-//Int cond patterns
-def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP),
- (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP),
- (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP),
- (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP),
- (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP),
- (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP),
- (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP),
- (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
-
-def : Pat<(brcond GPRC:$RA, bb:$DISP),
- (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP),
- (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP),
- (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
-
-//FP cond patterns
-def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>;
-
-
-def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-
-def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-
-def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP),
- (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-
-def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
- (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
-
-//End Branches
-
-//S_floating : IEEE Single
-//T_floating : IEEE Double
-
-//Unused instructions
-//Mnemonic Format Opcode Description
-//CALL_PAL Pcd 00 Trap to PALcode
-//ECB Mfc 18.E800 Evict cache block
-//EXCB Mfc 18.0400 Exception barrier
-//FETCH Mfc 18.8000 Prefetch data
-//FETCH_M Mfc 18.A000 Prefetch data, modify intent
-//LDQ_U Mem 0B Load unaligned quadword
-//MB Mfc 18.4000 Memory barrier
-//STQ_U Mem 0F Store unaligned quadword
-//TRAPB Mfc 18.0000 Trap barrier
-//WH64 Mfc 18.F800 Write hint  64 bytes
-//WMB Mfc 18.4400 Write memory barrier
-//MF_FPCR F-P 17.025 Move from FPCR
-//MT_FPCR F-P 17.024 Move to FPCR
-//There are in the Multimedia extensions, so let's not use them yet
-//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
-//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
-//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
-//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
-//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
-//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
-//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
-//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
-//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
-//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
-//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
-//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
-//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
-//CVTLQ F-P 17.010 Convert longword to quadword
-//CVTQL F-P 17.030 Convert quadword to longword
-
-
-//Constant handling
-
-def immConst2Part : PatLeaf<(imm), [{
- //true if imm fits in a LDAH LDA pair
- int64_t val = (int64_t)N->getZExtValue();
- return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW);
-}]>;
-def immConst2PartInt : PatLeaf<(imm), [{
- //true if imm fits in a LDAH LDA pair with zeroext
- uint64_t uval = N->getZExtValue();
- int32_t val32 = (int32_t)uval;
- return ((uval >> 32) == 0 && //empty upper bits
- val32 <= IMM_FULLHIGH);
-// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True
-}], SExt32>;
-
-def : Pat<(i64 immConst2Part:$imm),
- (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
-
-def : Pat<(i64 immSExt16:$imm),
- (LDA immSExt16:$imm, R31)>;
-
-def : Pat<(i64 immSExt16int:$imm),
- (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
-def : Pat<(i64 immConst2PartInt:$imm),
- (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
- (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
-
-
-//TODO: I want to just define these like this!
-//def : Pat<(i64 0),
-// (R31)>;
-//def : Pat<(f64 0.0),
-// (F31)>;
-//def : Pat<(f64 -0.0),
-// (CPYSNT F31, F31)>;
-//def : Pat<(f32 0.0),
-// (F31)>;
-//def : Pat<(f32 -0.0),
-// (CPYSNS F31, F31)>;
-
-//Misc Patterns:
-
-def : Pat<(sext_inreg GPRC:$RB, i32),
- (ADDLi GPRC:$RB, 0)>;
-
-def : Pat<(fabs F8RC:$RB),
- (CPYST F31, F8RC:$RB)>;
-def : Pat<(fabs F4RC:$RB),
- (CPYSS F31, F4RC:$RB)>;
-def : Pat<(fneg F8RC:$RB),
- (CPYSNT F8RC:$RB, F8RC:$RB)>;
-def : Pat<(fneg F4RC:$RB),
- (CPYSNS F4RC:$RB, F4RC:$RB)>;
-
-def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
- (CPYSNS F4RC:$B, F4RC:$A)>;
-def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
- (CPYSNT F8RC:$B, F8RC:$A)>;
-def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
- (CPYSNSt F8RC:$B, F4RC:$A)>;
-def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
- (CPYSNTs F4RC:$B, F8RC:$A)>;
-
-//Yes, signed multiply high is ugly
-def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
- (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA),
- (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
-
-//Stupid crazy arithmetic stuff:
-let AddedComplexity = 1 in {
-def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
-
-//slight tree expansion if we are multiplying near to a power of 2
-//n is above a power of 2
-def : Pat<(mul GPRC:$RA, immRem1:$imm),
- (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, immRem2:$imm),
- (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
-def : Pat<(mul GPRC:$RA, immRem3:$imm),
- (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
-def : Pat<(mul GPRC:$RA, immRem4:$imm),
- (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
-def : Pat<(mul GPRC:$RA, immRem5:$imm),
- (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
-def : Pat<(mul GPRC:$RA, immRemP2:$imm),
- (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
-
-//n is below a power of 2
-//FIXME: figure out why something is truncating the imm to 32bits
-// this will fix 2007-11-27-mulneg3
-//def : Pat<(mul GPRC:$RA, immRem1n:$imm),
-// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
-//def : Pat<(mul GPRC:$RA, immRem2n:$imm),
-// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
-//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
-// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
-//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
-// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
-//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
-// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
-//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
-// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
-} //Added complexity
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
deleted file mode 100644
index 85fbfd1affe2..000000000000
--- a/lib/Target/Alpha/AlphaLLRP.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Here we check for potential replay traps introduced by the spiller
-// We also align some branch targets if we can do so for free.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "alpha-nops"
-#include "Alpha.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-STATISTIC(nopintro, "Number of nops inserted");
-STATISTIC(nopalign, "Number of nops inserted for alignment");
-
-namespace {
- cl::opt<bool>
- AlignAll("alpha-align-all", cl::Hidden,
- cl::desc("Align all blocks"));
-
- struct AlphaLLRPPass : public MachineFunctionPass {
- /// Target machine description which we query for reg. names, data
- /// layout, etc.
- ///
- AlphaTargetMachine &TM;
-
- static char ID;
- AlphaLLRPPass(AlphaTargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) { }
-
- virtual const char *getPassName() const {
- return "Alpha NOP inserter";
- }
-
- bool runOnMachineFunction(MachineFunction &F) {
- const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
- bool Changed = false;
- MachineInstr* prev[3] = {0,0,0};
- DebugLoc dl;
- unsigned count = 0;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI) {
- MachineBasicBlock& MBB = *FI;
- bool ub = false;
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
- if (count%4 == 0)
- prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
- ++count;
- MachineInstr *MI = I++;
- switch (MI->getOpcode()) {
- case Alpha::LDQ: case Alpha::LDL:
- case Alpha::LDWU: case Alpha::LDBU:
- case Alpha::LDT: case Alpha::LDS:
- case Alpha::STQ: case Alpha::STL:
- case Alpha::STW: case Alpha::STB:
- case Alpha::STT: case Alpha::STS:
- if (MI->getOperand(2).getReg() == Alpha::R30) {
- if (prev[0] &&
- prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
- prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
- prev[0] = prev[1];
- prev[1] = prev[2];
- prev[2] = 0;
- BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31)
- .addReg(Alpha::R31);
- Changed = true; nopintro += 1;
- count += 1;
- } else if (prev[1]
- && prev[1]->getOperand(2).getReg() ==
- MI->getOperand(2).getReg()
- && prev[1]->getOperand(1).getImm() ==
- MI->getOperand(1).getImm()) {
- prev[0] = prev[2];
- prev[1] = prev[2] = 0;
- BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31)
- .addReg(Alpha::R31);
- BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31)
- .addReg(Alpha::R31);
- Changed = true; nopintro += 2;
- count += 2;
- } else if (prev[2]
- && prev[2]->getOperand(2).getReg() ==
- MI->getOperand(2).getReg()
- && prev[2]->getOperand(1).getImm() ==
- MI->getOperand(1).getImm()) {
- prev[0] = prev[1] = prev[2] = 0;
- BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31).addReg(Alpha::R31);
- BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31).addReg(Alpha::R31);
- BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31).addReg(Alpha::R31);
- Changed = true; nopintro += 3;
- count += 3;
- }
- prev[0] = prev[1];
- prev[1] = prev[2];
- prev[2] = MI;
- break;
- }
- prev[0] = prev[1];
- prev[1] = prev[2];
- prev[2] = 0;
- break;
- case Alpha::ALTENT:
- case Alpha::MEMLABEL:
- case Alpha::PCLABEL:
- --count;
- break;
- case Alpha::BR:
- case Alpha::JMP:
- ub = true;
- //fall through
- default:
- prev[0] = prev[1];
- prev[1] = prev[2];
- prev[2] = 0;
- break;
- }
- }
- if (ub || AlignAll) {
- //we can align stuff for free at this point
- while (count % 4) {
- BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
- .addReg(Alpha::R31).addReg(Alpha::R31);
- ++count;
- ++nopalign;
- prev[0] = prev[1];
- prev[1] = prev[2];
- prev[2] = 0;
- }
- }
- }
- return Changed;
- }
- };
- char AlphaLLRPPass::ID = 0;
-} // end of anonymous namespace
-
-FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
- return new AlphaLLRPPass(tm);
-}
diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
deleted file mode 100644
index 186738c20c70..000000000000
--- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares Alpha-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAMACHINEFUNCTIONINFO_H
-#define ALPHAMACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// AlphaMachineFunctionInfo - This class is derived from MachineFunction
-/// private Alpha target-specific information for each MachineFunction.
-class AlphaMachineFunctionInfo : public MachineFunctionInfo {
- /// GlobalBaseReg - keeps track of the virtual register initialized for
- /// use as the global base register. This is used for PIC in some PIC
- /// relocation models.
- unsigned GlobalBaseReg;
-
- /// GlobalRetAddr = keeps track of the virtual register initialized for
- /// the return address value.
- unsigned GlobalRetAddr;
-
- /// VarArgsOffset - What is the offset to the first vaarg
- int VarArgsOffset;
- /// VarArgsBase - What is the base FrameIndex
- int VarArgsBase;
-
-public:
- AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
- VarArgsOffset(0), VarArgsBase(0) {}
-
- explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
- GlobalRetAddr(0),
- VarArgsOffset(0),
- VarArgsBase(0) {}
-
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
- unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
- void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
-
- int getVarArgsOffset() const { return VarArgsOffset; }
- void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
-
- int getVarArgsBase() const { return VarArgsBase; }
- void setVarArgsBase(int Base) { VarArgsBase = Base; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
deleted file mode 100644
index 8b6230fa2a24..000000000000
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "reginfo"
-#include "Alpha.h"
-#include "AlphaRegisterInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include <cstdlib>
-
-#define GET_REGINFO_TARGET_DESC
-#include "AlphaGenRegisterInfo.inc"
-
-using namespace llvm;
-
-AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
- : AlphaGenRegisterInfo(Alpha::R26), TII(tii) {
-}
-
-static long getUpper16(long l) {
- long y = l / Alpha::IMM_MULT;
- if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
- ++y;
- return y;
-}
-
-static long getLower16(long l) {
- long h = getUpper16(l);
- return l - h * Alpha::IMM_MULT;
-}
-
-const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
- const {
- static const unsigned CalleeSavedRegs[] = {
- Alpha::R9, Alpha::R10,
- Alpha::R11, Alpha::R12,
- Alpha::R13, Alpha::R14,
- Alpha::F2, Alpha::F3,
- Alpha::F4, Alpha::F5,
- Alpha::F6, Alpha::F7,
- Alpha::F8, Alpha::F9, 0
- };
- return CalleeSavedRegs;
-}
-
-BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- Reserved.set(Alpha::R15);
- Reserved.set(Alpha::R29);
- Reserved.set(Alpha::R30);
- Reserved.set(Alpha::R31);
- return Reserved;
-}
-
-//===----------------------------------------------------------------------===//
-// Stack Frame Processing methods
-//===----------------------------------------------------------------------===//
-
-void AlphaRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (TFI->hasFP(MF)) {
- // If we have a frame pointer, turn the adjcallstackup instruction into a
- // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
- // <amt>'
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- MachineInstr *New;
- if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
- New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
- .addImm(-Amount).addReg(Alpha::R30);
- } else {
- assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
- New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
- .addImm(Amount).addReg(Alpha::R30);
- }
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
-
- MBB.erase(I);
-}
-
-//Alpha has a slightly funny stack:
-//Args
-//<- incoming SP
-//fixed locals (and spills, callee saved, etc)
-//<- FP
-//variable locals
-//<- SP
-
-void
-AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
- assert(SPAdj == 0 && "Unexpected");
-
- unsigned i = 0;
- MachineInstr &MI = *II;
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- bool FP = TFI->hasFP(MF);
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
-
- // Add the base register of R30 (SP) or R15 (FP).
- MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
-
- // Now add the frame object offset to the offset from the virtual frame index.
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
-
- DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
-
- Offset += MF.getFrameInfo()->getStackSize();
-
- DEBUG(errs() << "Corrected Offset " << Offset
- << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
-
- if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
- DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
- << Offset << "\n");
- //so in this case, we need to use a temporary register, and move the
- //original inst off the SP/FP
- //fix up the old:
- MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
- MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
- //insert the new
- MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
- TII.get(Alpha::LDAH), Alpha::R28)
- .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
- MBB.insert(II, nMI);
- } else {
- MI.getOperand(i).ChangeToImmediate(Offset);
- }
-}
-
-unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
-}
-
-unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
- return 0;
-}
-
-unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
- return 0;
-}
-
-std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
-{
- std::string s(AlphaRegDesc[reg].Name);
- return s;
-}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
deleted file mode 100644
index e35be273c7cd..000000000000
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAREGISTERINFO_H
-#define ALPHAREGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "AlphaGenRegisterInfo.inc"
-
-namespace llvm {
-
-class TargetInstrInfo;
-class Type;
-
-struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
- const TargetInstrInfo &TII;
-
- AlphaRegisterInfo(const TargetInstrInfo &tii);
-
- /// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
- BitVector getReservedRegs(const MachineFunction &MF) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
-
- // Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
- static std::string getPrettyName(unsigned reg);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
deleted file mode 100644
index 32120d750413..000000000000
--- a/lib/Target/Alpha/AlphaRegisterInfo.td
+++ /dev/null
@@ -1,133 +0,0 @@
-//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Alpha register set.
-//
-//===----------------------------------------------------------------------===//
-
-class AlphaReg<string n> : Register<n> {
- field bits<5> Num;
- let Namespace = "Alpha";
-}
-
-// We identify all our registers with a 5-bit ID, for consistency's sake.
-
-// GPR - One of the 32 32-bit general-purpose registers
-class GPR<bits<5> num, string n> : AlphaReg<n> {
- let Num = num;
-}
-
-// FPR - One of the 32 64-bit floating-point registers
-class FPR<bits<5> num, string n> : AlphaReg<n> {
- let Num = num;
-}
-
-//#define FP $15
-//#define RA $26
-//#define PV $27
-//#define GP $29
-//#define SP $30
-
-// General-purpose registers
-def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>;
-def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>;
-def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>;
-def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>;
-def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>;
-def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>;
-def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>;
-def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>;
-def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>;
-def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>;
-def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
-def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
-def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
-def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
-def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
-def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
-def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
-def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
-def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
-def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
-def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
-def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
-def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
-def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
-def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
-def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
-def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
-def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
-def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
-def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
-def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
-def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
-
-// Floating-point registers
-def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>;
-def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>;
-def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>;
-def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>;
-def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>;
-def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>;
-def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>;
-def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>;
-def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>;
-def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>;
-def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
-def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
-def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
-def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
-def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
-def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
-def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
-def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
-def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
-def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
-def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
-def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
-def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
-def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
-def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
-def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
-def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
-def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
-def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
-def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
-def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
-def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
-
- // //#define FP $15
- // //#define RA $26
- // //#define PV $27
- // //#define GP $29
- // //#define SP $30
- // $28 is undefined after any and all calls
-
-/// Register classes
-def GPRC : RegisterClass<"Alpha", [i64], 64, (add
- // Volatile
- R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
- R23, R24, R25, R28,
- //Special meaning, but volatile
- R27, //procedure address
- R26, //return address
- R29, //global offset table address
- // Non-volatile
- R9, R10, R11, R12, R13, R14,
-// Don't allocate 15, 30, 31
- R15, R30, R31)>; //zero
-
-def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
- F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
- F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
- // Saved:
- F2, F3, F4, F5, F6, F7, F8, F9,
- F31)>; //zero
-
-def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h
deleted file mode 100644
index 4c92045d4696..000000000000
--- a/lib/Target/Alpha/AlphaRelocations.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Alpha target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHARELOCATIONS_H
-#define ALPHARELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace Alpha {
- enum RelocationType {
- reloc_literal,
- reloc_gprellow,
- reloc_gprelhigh,
- reloc_gpdist,
- reloc_bsr
- };
- }
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
deleted file mode 100644
index 3703dd4fa9f6..000000000000
--- a/lib/Target/Alpha/AlphaSchedule.td
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//This is table 2-2 from the 21264 compiler writers guide
-//modified some
-
-//Pipelines
-
-def L0 : FuncUnit;
-def L1 : FuncUnit;
-def FST0 : FuncUnit;
-def FST1 : FuncUnit;
-def U0 : FuncUnit;
-def U1 : FuncUnit;
-def FA : FuncUnit;
-def FM : FuncUnit;
-
-def s_ild : InstrItinClass;
-def s_fld : InstrItinClass;
-def s_ist : InstrItinClass;
-def s_fst : InstrItinClass;
-def s_lda : InstrItinClass;
-def s_rpcc : InstrItinClass;
-def s_rx : InstrItinClass;
-def s_mxpr : InstrItinClass;
-def s_icbr : InstrItinClass;
-def s_ubr : InstrItinClass;
-def s_jsr : InstrItinClass;
-def s_iadd : InstrItinClass;
-def s_ilog : InstrItinClass;
-def s_ishf : InstrItinClass;
-def s_cmov : InstrItinClass;
-def s_imul : InstrItinClass;
-def s_imisc : InstrItinClass;
-def s_fbr : InstrItinClass;
-def s_fadd : InstrItinClass;
-def s_fmul : InstrItinClass;
-def s_fcmov : InstrItinClass;
-def s_fdivt : InstrItinClass;
-def s_fdivs : InstrItinClass;
-def s_fsqrts: InstrItinClass;
-def s_fsqrtt: InstrItinClass;
-def s_ftoi : InstrItinClass;
-def s_itof : InstrItinClass;
-def s_pseudo : InstrItinClass;
-
-//Table 2-4 Instruction Class Latency in Cycles
-//modified some
-
-def Alpha21264Itineraries : ProcessorItineraries<
- [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
- InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
- InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
- InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
- InstrItinData<s_fst , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
- InstrItinData<s_lda , [InstrStage<1, [L0, L1, U0, U1]>]>,
- InstrItinData<s_rpcc , [InstrStage<1, [L1]>]>,
- InstrItinData<s_rx , [InstrStage<1, [L1]>]>,
- InstrItinData<s_mxpr , [InstrStage<1, [L0, L1]>]>,
- InstrItinData<s_icbr , [InstrStage<0, [U0, U1]>]>,
- InstrItinData<s_ubr , [InstrStage<3, [U0, U1]>]>,
- InstrItinData<s_jsr , [InstrStage<3, [L0]>]>,
- InstrItinData<s_iadd , [InstrStage<1, [L0, U0, L1, U1]>]>,
- InstrItinData<s_ilog , [InstrStage<1, [L0, U0, L1, U1]>]>,
- InstrItinData<s_ishf , [InstrStage<1, [U0, U1]>]>,
- InstrItinData<s_cmov , [InstrStage<1, [L0, U0, L1, U1]>]>,
- InstrItinData<s_imul , [InstrStage<7, [U1]>]>,
- InstrItinData<s_imisc , [InstrStage<3, [U0]>]>,
- InstrItinData<s_fbr , [InstrStage<0, [FA]>]>,
- InstrItinData<s_fadd , [InstrStage<6, [FA]>]>,
- InstrItinData<s_fmul , [InstrStage<6, [FM]>]>,
- InstrItinData<s_fcmov , [InstrStage<6, [FA]>]>,
- InstrItinData<s_fdivs , [InstrStage<12, [FA]>]>,
- InstrItinData<s_fdivt , [InstrStage<15, [FA]>]>,
- InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
- InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
- InstrItinData<s_ftoi , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
- InstrItinData<s_itof , [InstrStage<4, [L0, L1]>]>
-]>;
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
deleted file mode 100644
index f1958fe6b5ad..000000000000
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the AlphaSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "alpha-selectiondag-info"
-#include "AlphaTargetMachine.h"
-using namespace llvm;
-
-AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
-
-AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
-}
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
deleted file mode 100644
index 3405cc0cdedd..000000000000
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Alpha subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHASELECTIONDAGINFO_H
-#define ALPHASELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class AlphaTargetMachine;
-
-class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
- ~AlphaSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
deleted file mode 100644
index bd55ce9e315a..000000000000
--- a/lib/Target/Alpha/AlphaSubtarget.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Alpha specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaSubtarget.h"
-#include "Alpha.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "AlphaGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
- : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
- std::string CPUName = CPU;
- if (CPUName.empty())
- CPUName = "generic";
-
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-
- // Initialize scheduling itinerary for the specified CPU.
- InstrItins = getInstrItineraryForCPU(CPUName);
-}
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
deleted file mode 100644
index 70b311683f8b..000000000000
--- a/lib/Target/Alpha/AlphaSubtarget.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Alpha specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHASUBTARGET_H
-#define ALPHASUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "AlphaGenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRe;
-
-class AlphaSubtarget : public AlphaGenSubtargetInfo {
-protected:
-
- bool HasCT;
-
- InstrItineraryData InstrItins;
-
-public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- ///
- AlphaSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- bool hasCT() const { return HasCT; }
-};
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
deleted file mode 100644
index fc9a6771a900..000000000000
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-extern "C" void LLVMInitializeAlphaTarget() {
- // Register the target.
- RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
-}
-
-AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
- DataLayout("e-f128:128:128-n64"),
- FrameLowering(Subtarget),
- Subtarget(TT, CPU, FS),
- TLInfo(*this),
- TSInfo(*this) {
-}
-
-//===----------------------------------------------------------------------===//
-// Pass Pipeline Configuration
-//===----------------------------------------------------------------------===//
-
-bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createAlphaISelDag(*this));
- return false;
-}
-bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // Must run branch selection immediately preceding the asm printer
- PM.add(createAlphaBranchSelectionPass());
- PM.add(createAlphaLLRPPass(*this));
- return false;
-}
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
deleted file mode 100644
index 48bb948a7945..000000000000
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Alpha-specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHA_TARGETMACHINE_H
-#define ALPHA_TARGETMACHINE_H
-
-#include "AlphaInstrInfo.h"
-#include "AlphaISelLowering.h"
-#include "AlphaFrameLowering.h"
-#include "AlphaSelectionDAGInfo.h"
-#include "AlphaSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-class GlobalValue;
-
-class AlphaTargetMachine : public LLVMTargetMachine {
- const TargetData DataLayout; // Calculates type size & alignment
- AlphaInstrInfo InstrInfo;
- AlphaFrameLowering FrameLowering;
- AlphaSubtarget Subtarget;
- AlphaTargetLowering TLInfo;
- AlphaSelectionDAGInfo TSInfo;
-
-public:
- AlphaTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
-
- virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameLowering *getFrameLowering() const {
- return &FrameLowering;
- }
- virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; }
- virtual const AlphaRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
- virtual const AlphaTargetLowering* getTargetLowering() const {
- return &TLInfo;
- }
- virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
- virtual const TargetData *getTargetData() const { return &DataLayout; }
-
- // Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
deleted file mode 100644
index a6d551618b9f..000000000000
--- a/lib/Target/Alpha/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Alpha.td)
-
-llvm_tablegen(AlphaGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(AlphaGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(AlphaGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget)
-add_public_tablegen_target(AlphaCommonTableGen)
-
-add_llvm_target(AlphaCodeGen
- AlphaAsmPrinter.cpp
- AlphaBranchSelector.cpp
- AlphaInstrInfo.cpp
- AlphaISelDAGToDAG.cpp
- AlphaISelLowering.cpp
- AlphaFrameLowering.cpp
- AlphaLLRP.cpp
- AlphaRegisterInfo.cpp
- AlphaSubtarget.cpp
- AlphaTargetMachine.cpp
- AlphaSelectionDAGInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMAlphaCodeGen
- LLVMAlphaDesc
- LLVMAlphaInfo
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
deleted file mode 100644
index a35e8846e072..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the AlphaMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaMCAsmInfo.h"
-using namespace llvm;
-
-AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
- AlignmentIsInBytes = false;
- PrivateGlobalPrefix = "$";
- GPRel32Directive = ".gprel32";
- WeakRefDirective = "\t.weak\t";
- HasSetDirective = false;
-}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
deleted file mode 100644
index 4ad021ca6761..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Alpha specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaMCTargetDesc.h"
-#include "AlphaMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "AlphaGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "AlphaGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "AlphaGenRegisterInfo.inc"
-
-using namespace llvm;
-
-
-static MCInstrInfo *createAlphaMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitAlphaMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitAlphaMCRegisterInfo(X, Alpha::R26);
- return X;
-}
-
-static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(Reloc::PIC_, CM);
- return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeAlphaTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget,
- createAlphaMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
- createAlphaMCSubtargetInfo);
-}
diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index f745ecbdb67f..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-add_llvm_library(LLVMAlphaDesc
- AlphaMCTargetDesc.cpp
- AlphaMCAsmInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMAlphaDesc
- LLVMAlphaInfo
- LLVMMC
- )
-
-add_dependencies(LLVMAlphaDesc AlphaCommonTableGen)
diff --git a/lib/Target/Alpha/MCTargetDesc/Makefile b/lib/Target/Alpha/MCTargetDesc/Makefile
deleted file mode 100644
index d55175fa69dc..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMAlphaDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
deleted file mode 100644
index f48847a0627d..000000000000
--- a/lib/Target/Alpha/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMAlphaCodeGen
-TARGET = Alpha
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \
- AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
- AlphaGenCallingConv.inc AlphaGenSubtargetInfo.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
deleted file mode 100644
index cc170e313030..000000000000
--- a/lib/Target/Alpha/README.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-***
-
-add gcc builtins for alpha instructions
-
-
-***
-
-custom expand byteswap into nifty
-extract/insert/mask byte/word/longword/quadword low/high
-sequences
-
-***
-
-see if any of the extract/insert/mask operations can be added
-
-***
-
-match more interesting things for cmovlbc cmovlbs (move if low bit clear/set)
-
-***
-
-lower srem and urem
-
-remq(i,j): i - (j * divq(i,j)) if j != 0
-remqu(i,j): i - (j * divqu(i,j)) if j != 0
-reml(i,j): i - (j * divl(i,j)) if j != 0
-remlu(i,j): i - (j * divlu(i,j)) if j != 0
-
-***
-
-add crazy vector instructions (MVI):
-
-(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
-PKWB, UNPKBW pack/unpack word to byte
-PKLB UNPKBL pack/unpack long to byte
-PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b))
-
-cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions)
-
-this has some good examples for other operations that can be synthesised well
-from these rather meager vector ops (such as saturating add).
-http://www.alphalinux.org/docs/MVI-full.html
diff --git a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
deleted file mode 100644
index bdc69e788bcc..000000000000
--- a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-llvm::Target llvm::TheAlphaTarget;
-
-extern "C" void LLVMInitializeAlphaTargetInfo() {
- RegisterTarget<Triple::alpha, /*HasJIT=*/true>
- X(TheAlphaTarget, "alpha", "Alpha [experimental]");
-}
diff --git a/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/lib/Target/Alpha/TargetInfo/CMakeLists.txt
deleted file mode 100644
index cac3178b789d..000000000000
--- a/lib/Target/Alpha/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMAlphaInfo
- AlphaTargetInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMAlphaInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
-add_dependencies(LLVMAlphaInfo AlphaCommonTableGen)
diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h
deleted file mode 100644
index a00ff4cc3275..000000000000
--- a/lib/Target/Blackfin/Blackfin.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Blackfin back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_BLACKFIN_H
-#define TARGET_BLACKFIN_H
-
-#include "MCTargetDesc/BlackfinMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
- class FunctionPass;
- class BlackfinTargetMachine;
-
- FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/Blackfin.td b/lib/Target/Blackfin/Blackfin.td
deleted file mode 100644
index cd90962a9540..000000000000
--- a/lib/Target/Blackfin/Blackfin.td
+++ /dev/null
@@ -1,202 +0,0 @@
-//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// Blackfin Subtarget features.
-//===----------------------------------------------------------------------===//
-
-def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
- "Build for SDRAM">;
-
-def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
- "Assume instruction cache lookaside buffers are enabled at runtime">;
-
-//===----------------------------------------------------------------------===//
-// Bugs in the silicon becomes workarounds in the compiler.
-// See http://www.analog.com/ for the full list of IC anomalies.
-//===----------------------------------------------------------------------===//
-
-def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
- "Work around 05000074 - "
- "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
-
-def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
- "Work around 05000244 - "
- "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
-
-def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
- "Work around 05000245 - "
- "Access in the Shadow of a Conditional Branch">;
-
-def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
- "Work around 05000257 - "
- "Interrupt/Exception During Short Hardware Loop">;
-
-def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
- "Work around 05000283 - "
- "System MMR Write Is Stalled Indefinitely when Killed">;
-
-def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
- "Work around 05000312 - "
- "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
-
-def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
- "wa_killed_mmr", "true",
- "Work around 05000315 - "
- "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
-
-def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
- "Work around 05000371 - "
- "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
-
-def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
- "Work around 05000426 - "
- "Speculative Fetches of Indirect-Pointer Instructions">;
-
-//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "BlackfinRegisterInfo.td"
-include "BlackfinCallingConv.td"
-include "BlackfinIntrinsics.td"
-include "BlackfinInstrInfo.td"
-
-def BlackfinInstrInfo : InstrInfo {}
-
-//===----------------------------------------------------------------------===//
-// Blackfin processors supported.
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
- : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
-
-def : Proc<"generic", "", []>;
-
-multiclass Core<string Name,string Suffix,
- list<SubtargetFeature> Features> {
- def : Proc<Name, Suffix, Features>;
- def : Proc<Name, "", Features>;
- def : Proc<Name, "-none", []>;
-}
-
-multiclass CoreEdinburgh<string Name>
- : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
- def : Proc<Name, "-0.5",
- [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
- WA_RETS]>;
- def : Proc<Name, "-0.4",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS]>;
- def : Proc<Name, "-0.3",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS]>;
- def : Proc<Name, "-any",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS]>;
-}
-multiclass CoreBraemar<string Name>
- : Core<Name, "-0.3",
- [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
- def : Proc<Name, "-0.2",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-any",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreStirling<string Name>
- : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
- def : Proc<Name, "-0.4",
- [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-0.3",
- [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
- WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-any",
- [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
- WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreMoab<string Name>
- : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
- def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
- def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-0.0",
- [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-any",
- [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreTeton<string Name>
- : Core<Name, "-0.5",
- [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
- WA_RETS, WA_IND_CALL]> {
- def : Proc<Name, "-0.3",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-any",
- [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
- WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreKookaburra<string Name>
- : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
- def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
- def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreMockingbird<string Name>
- : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
- def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
- def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-}
-multiclass CoreBrodie<string Name>
- : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
- def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
- def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-}
-
-defm BF512 : CoreBrodie<"bf512">;
-defm BF514 : CoreBrodie<"bf514">;
-defm BF516 : CoreBrodie<"bf516">;
-defm BF518 : CoreBrodie<"bf518">;
-defm BF522 : CoreMockingbird<"bf522">;
-defm BF523 : CoreKookaburra<"bf523">;
-defm BF524 : CoreMockingbird<"bf524">;
-defm BF525 : CoreKookaburra<"bf525">;
-defm BF526 : CoreMockingbird<"bf526">;
-defm BF527 : CoreKookaburra<"bf527">;
-defm BF531 : CoreEdinburgh<"bf531">;
-defm BF532 : CoreEdinburgh<"bf532">;
-defm BF533 : CoreEdinburgh<"bf533">;
-defm BF534 : CoreBraemar<"bf534">;
-defm BF536 : CoreBraemar<"bf536">;
-defm BF537 : CoreBraemar<"bf537">;
-defm BF538 : CoreStirling<"bf538">;
-defm BF539 : CoreStirling<"bf539">;
-defm BF542 : CoreMoab<"bf542">;
-defm BF544 : CoreMoab<"bf544">;
-defm BF548 : CoreMoab<"bf548">;
-defm BF549 : CoreMoab<"bf549">;
-defm BF561 : CoreTeton<"bf561">;
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-
-def Blackfin : Target {
- // Pull in Instruction Info:
- let InstructionSet = BlackfinInstrInfo;
-}
diff --git a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
deleted file mode 100644
index ed9844e1bee4..000000000000
--- a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Blackfin.h"
-#include "BlackfinInstrInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- class BlackfinAsmPrinter : public AsmPrinter {
- public:
- BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {}
-
- virtual const char *getPassName() const {
- return "Blackfin Assembly Printer";
- }
-
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
- static const char *getRegisterName(unsigned RegNo);
-
- void EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
- }
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- };
-} // end of anonymous namespace
-
-#include "BlackfinGenAsmWriter.inc"
-
-extern "C" void LLVMInitializeBlackfinAsmPrinter() {
- RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
-}
-
-void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
- "Virtual registers should be already mapped!");
- O << getRegisterName(MO.getReg());
- break;
-
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- break;
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
- case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
- printOffset(MO.getOffset(), O);
- break;
- case MachineOperand::MO_ExternalSymbol:
- O << *GetExternalSymbolSymbol(MO.getSymbolName());
- break;
- case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
- << MO.getIndex();
- break;
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
- break;
- default:
- llvm_unreachable("<unknown operand type>");
- break;
- }
-}
-
-void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- printOperand(MI, opNum, O);
-
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return;
-
- O << " + ";
- printOperand(MI, opNum+1, O);
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
- switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
- case 'r':
- break;
- }
- }
-
- printOperand(MI, OpNo, O);
-
- return false;
-}
-
-bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
-
- O << '[';
- printOperand(MI, OpNo, O);
- O << ']';
-
- return false;
-}
diff --git a/lib/Target/Blackfin/BlackfinCallingConv.td b/lib/Target/Blackfin/BlackfinCallingConv.td
deleted file mode 100644
index 0abc84c3c405..000000000000
--- a/lib/Target/Blackfin/BlackfinCallingConv.td
+++ /dev/null
@@ -1,30 +0,0 @@
-//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the Blackfin architectures.
-//
-//===----------------------------------------------------------------------===//
-
-// Blackfin C Calling convention.
-def CC_Blackfin : CallingConv<[
- CCIfType<[i16], CCPromoteToType<i32>>,
- CCIfSRet<CCAssignToReg<[P0]>>,
- CCAssignToReg<[R0, R1, R2]>,
- CCAssignToStack<4, 4>
-]>;
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
-//===----------------------------------------------------------------------===//
-
-// Blackfin C return-value convention.
-def RetCC_Blackfin : CallingConv<[
- CCIfType<[i16], CCPromoteToType<i32>>,
- CCAssignToReg<[R0, R1]>
-]>;
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/lib/Target/Blackfin/BlackfinFrameLowering.cpp
deleted file mode 100644
index 0b0984d2f777..000000000000
--- a/lib/Target/Blackfin/BlackfinFrameLowering.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinFrameLowering.h"
-#include "BlackfinInstrInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) ||
- MFI->adjustsStack() || MFI->hasVarSizedObjects();
-}
-
-// Always reserve a call frame. We dont have enough registers to adjust SP.
-bool BlackfinFrameLowering::
-hasReservedCallFrame(const MachineFunction &MF) const {
- return true;
-}
-
-// Emit a prologue that sets up a stack frame.
-// On function entry, R0-R2 and P0 may hold arguments.
-// R3, P1, and P2 may be used as scratch registers
-void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const BlackfinRegisterInfo *RegInfo =
- static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const BlackfinInstrInfo &TII =
- *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
-
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- int FrameSize = MFI->getStackSize();
- if (FrameSize%4) {
- FrameSize = (FrameSize+3) & ~3;
- MFI->setStackSize(FrameSize);
- }
-
- if (!hasFP(MF)) {
- assert(!MFI->adjustsStack() &&
- "FP elimination on a non-leaf function is not supported");
- RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
- return;
- }
-
- // emit a LINK instruction
- if (FrameSize <= 0x3ffff) {
- BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
- return;
- }
-
- // Frame is too big, do a manual LINK:
- // [--SP] = RETS;
- // [--SP] = FP;
- // FP = SP;
- // P1 = -FrameSize;
- // SP = SP + P1;
- BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
- .addReg(BF::RETS, RegState::Kill);
- BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
- .addReg(BF::FP, RegState::Kill);
- BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
- .addReg(BF::SP);
- RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
- .addReg(BF::SP, RegState::Kill)
- .addReg(BF::P1, RegState::Kill);
-
-}
-
-void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const BlackfinRegisterInfo *RegInfo =
- static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const BlackfinInstrInfo &TII =
- *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- DebugLoc dl = MBBI->getDebugLoc();
-
- int FrameSize = MFI->getStackSize();
- assert(FrameSize%4 == 0 && "Misaligned frame size");
-
- if (!hasFP(MF)) {
- assert(!MFI->adjustsStack() &&
- "FP elimination on a non-leaf function is not supported");
- RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
- return;
- }
-
- // emit an UNLINK instruction
- BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
-}
-
-void BlackfinFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const BlackfinRegisterInfo *RegInfo =
- static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const TargetRegisterClass *RC = BF::DPRegisterClass;
-
- if (RegInfo->requiresRegisterScavenging(MF)) {
- // Reserve a slot close to SP or frame pointer.
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
-}
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.h b/lib/Target/Blackfin/BlackfinFrameLowering.h
deleted file mode 100644
index 169aa8e3011d..000000000000
--- a/lib/Target/Blackfin/BlackfinFrameLowering.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFIN_FRAMEINFO_H
-#define BLACKFIN_FRAMEINFO_H
-
-#include "Blackfin.h"
-#include "BlackfinSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
- class BlackfinSubtarget;
-
-class BlackfinFrameLowering : public TargetFrameLowering {
-protected:
- const BlackfinSubtarget &STI;
-
-public:
- explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
- }
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
deleted file mode 100644
index 215ca43ea338..000000000000
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the Blackfin target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Blackfin.h"
-#include "BlackfinTargetMachine.h"
-#include "BlackfinRegisterInfo.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
-/// instructions for SelectionDAG operations.
-namespace {
- class BlackfinDAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
- /// can make the right decision when generating code for different targets.
- //const BlackfinSubtarget &Subtarget;
- public:
- BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel) {}
-
- virtual void PostprocessISelDAG();
-
- virtual const char *getPassName() const {
- return "Blackfin DAG->DAG Pattern Instruction Selection";
- }
-
- // Include the pieces autogenerated from the target description.
-#include "BlackfinGenDAGISel.inc"
-
- private:
- SDNode *Select(SDNode *N);
- bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
-
- // Walk the DAG after instruction selection, fixing register class issues.
- void FixRegisterClasses(SelectionDAG &DAG);
-
- const BlackfinInstrInfo &getInstrInfo() {
- return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
- }
- const BlackfinRegisterInfo *getRegisterInfo() {
- return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
- }
- };
-} // end anonymous namespace
-
-FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new BlackfinDAGToDAGISel(TM, OptLevel);
-}
-
-void BlackfinDAGToDAGISel::PostprocessISelDAG() {
- FixRegisterClasses(*CurDAG);
-}
-
-SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode())
- return NULL; // Already selected.
-
- switch (N->getOpcode()) {
- default: break;
- case ISD::FrameIndex: {
- // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
- // SP, Px
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
- return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
- CurDAG->getTargetConstant(0, MVT::i32));
- }
- }
-
- return SelectCode(N);
-}
-
-bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
- SDValue &Base,
- SDValue &Offset) {
- FrameIndexSDNode *FIN = 0;
- if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
- if (Addr.getOpcode() == ISD::ADD) {
- ConstantSDNode *CN = 0;
- if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
- (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
- (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
- // Constant positive word offset from frame index
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
- return true;
- }
- }
- return false;
-}
-
-static inline bool isCC(const TargetRegisterClass *RC) {
- return BF::AnyCCRegClass.hasSubClassEq(RC);
-}
-
-static inline bool isDCC(const TargetRegisterClass *RC) {
- return BF::DRegClass.hasSubClassEq(RC) || isCC(RC);
-}
-
-static void UpdateNodeOperand(SelectionDAG &DAG,
- SDNode *N,
- unsigned Num,
- SDValue Val) {
- SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
- ops[Num] = Val;
- SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
- DAG.ReplaceAllUsesWith(N, New);
-}
-
-// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
-// choosing the proper register classes.
-void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
- const BlackfinInstrInfo &TII = getInstrInfo();
- const BlackfinRegisterInfo *TRI = getRegisterInfo();
- DAG.AssignTopologicalOrder();
- HandleSDNode Dummy(DAG.getRoot());
-
- for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
- NI != DAG.allnodes_end(); ++NI) {
- if (NI->use_empty() || !NI->isMachineOpcode())
- continue;
- const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
- for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
- if (!UI->isMachineOpcode())
- continue;
-
- if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
- continue;
- const TargetRegisterClass *DefRC =
- TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
-
- const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
- if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
- continue;
- const TargetRegisterClass *UseRC =
- TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
- if (!DefRC || !UseRC)
- continue;
- // We cannot copy CC <-> !(CC/D)
- if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
- SDNode *Copy =
- DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- NI->getDebugLoc(),
- MVT::i32,
- UI.getUse().get(),
- DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
- UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
- }
- }
- }
- DAG.setRoot(Dummy.getValue());
-}
-
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
deleted file mode 100644
index 7d4c45fdf665..000000000000
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ /dev/null
@@ -1,645 +0,0 @@
-//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the interfaces that Blackfin uses to lower LLVM code
-// into a selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinISelLowering.h"
-#include "BlackfinTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation
-//===----------------------------------------------------------------------===//
-
-BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
- setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- setStackPointerRegisterToSaveRestore(BF::SP);
- setIntDivIsCheap(false);
-
- // Set up the legal register classes.
- addRegisterClass(MVT::i32, BF::DRegisterClass);
- addRegisterClass(MVT::i16, BF::D16RegisterClass);
-
- computeRegisterProperties();
-
- // Blackfin doesn't have i1 loads or stores
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
-
- // i16 registers don't do much
- setOperationAction(ISD::AND, MVT::i16, Promote);
- setOperationAction(ISD::OR, MVT::i16, Promote);
- setOperationAction(ISD::XOR, MVT::i16, Promote);
- setOperationAction(ISD::CTPOP, MVT::i16, Promote);
- // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
- // immediately.
- setOperationAction(ISD::CTLZ, MVT::i16, Promote);
- setOperationAction(ISD::CTTZ, MVT::i16, Promote);
- setOperationAction(ISD::SETCC, MVT::i16, Promote);
-
- // Blackfin has no division
- setOperationAction(ISD::SDIV, MVT::i16, Expand);
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i16, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i16, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i16, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
-
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
-
- // No carry-in operations.
- setOperationAction(ISD::ADDE, MVT::i32, Custom);
- setOperationAction(ISD::SUBE, MVT::i32, Custom);
-
- // Blackfin has no intrinsics for these particular operations.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
-
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- // i32 has native CTPOP, but not CTLZ/CTTZ
- setOperationAction(ISD::CTLZ, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::i32, Expand);
-
- // READCYCLECOUNTER needs special type legalization.
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
-
- setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
-
- // Use the default implementation.
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
-
- setMinFunctionAlignment(2);
-}
-
-const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return 0;
- case BFISD::CALL: return "BFISD::CALL";
- case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
- case BFISD::Wrapper: return "BFISD::Wrapper";
- }
-}
-
-EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
- // SETCC always sets the CC register. Technically that is an i1 register, but
- // that type is not legal, so we treat it as an i32 register.
- return MVT::i32;
-}
-
-SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-
- Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
-}
-
-SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- int JTI = cast<JumpTableSDNode>(Op)->getIndex();
-
- Op = DAG.getTargetJumpTable(JTI, MVT::i32);
- return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
-}
-
-SDValue
-BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space
- CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
-
- if (VA.isRegLoc()) {
- EVT RegVT = VA.getLocVT();
- TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
- BF::PRegisterClass : BF::DRegisterClass;
- assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
- assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
-
- unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
- MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
- InVals.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
- unsigned ObjSize = VA.getLocVT().getStoreSize();
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo(),
- false, false, 0));
- }
- }
-
- return Chain;
-}
-
-SDValue
-BlackfinTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
- // CCValAssign - represent the assignment of the return value to locations.
- SmallVector<CCValAssign, 16> RVLocs;
-
- // CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
-
- // Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
-
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
- SDValue Flag;
-
- // Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Opi = OutVals[i];
-
- // Expand to i32 if necessary
- switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
- break;
- case CCValAssign::ZExt:
- Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
- break;
- case CCValAssign::AExt:
- Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
- break;
- }
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
- // Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
- }
-
- if (Flag.getNode()) {
- return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- } else {
- return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
- }
-}
-
-SDValue
-BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- // Blackfin target does not yet support tail call optimization.
- isTailCall = false;
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
- CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space
- CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
-
- // Get the size of the outgoing arguments stack space requirement.
- unsigned ArgsSize = CCInfo.getNextStackOffset();
-
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- SmallVector<SDValue, 8> MemOpChains;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[i];
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- }
-
- // Arguments that can be passed on register must be kept at
- // RegsToPass vector
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
- int Offset = VA.getLocMemOffset();
- assert(Offset%4 == 0 && "Unaligned LocMemOffset");
- assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
- SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
- SDValue OffsetN = DAG.getIntPtrConstant(Offset);
- OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
- MachinePointerInfo(),false, false, 0));
- }
- }
-
- // Transform all store nodes into one single node because
- // all store nodes are independent of each other.
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token
- // chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
- // stuck together.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- // Likewise ExternalSymbol -> TargetExternalSymbol.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
- else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
-
- std::vector<EVT> NodeTys;
- NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
- SDValue Ops[] = { Chain, Callee, InFlag };
- Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
- InFlag.getNode() ? 3 : 2);
- InFlag = Chain.getValue(1);
-
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
- DAG.getIntPtrConstant(0, true), InFlag);
- InFlag = Chain.getValue(1);
-
- // Assign locations to each value returned by this call.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
-
- RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
-
- // Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &RV = RVLocs[i];
- unsigned Reg = RV.getLocReg();
-
- Chain = DAG.getCopyFromReg(Chain, dl, Reg,
- RVLocs[i].getLocVT(), InFlag);
- SDValue Val = Chain.getValue(0);
- InFlag = Chain.getValue(2);
- Chain = Chain.getValue(1);
-
- // Callee is responsible for extending any i16 return values.
- switch (RV.getLocInfo()) {
- case CCValAssign::SExt:
- Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
- DAG.getValueType(RV.getValVT()));
- break;
- case CCValAssign::ZExt:
- Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
- DAG.getValueType(RV.getValVT()));
- break;
- default:
- break;
- }
-
- // Truncate to valtype
- if (RV.getLocInfo() != CCValAssign::Full)
- Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
- InVals.push_back(Val);
- }
-
- return Chain;
-}
-
-// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
-// add-with-carry instructions.
-SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
- // Operands: lhs, rhs, carry-in (AC0 flag)
- // Results: sum, carry-out (AC0 flag)
- DebugLoc dl = Op.getDebugLoc();
-
- unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
-
- // zext incoming carry flag in AC0 to 32 bits
- SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
- /* flag= */ Op.getOperand(2));
- CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
- SDValue(CarryIn, 0));
-
- // Add operands, produce sum and carry flag
- SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
- Op.getOperand(0), Op.getOperand(1));
-
- // Store intermediate carry from Sum
- SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
- /* flag= */ SDValue(Sum, 1));
-
- // Add incoming carry, again producing an output flag
- Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
- SDValue(Sum, 0), SDValue(CarryIn, 0));
-
- // Update AC0 with the intermediate carry, producing a flag.
- SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
- SDValue(Carry1, 0));
-
- // Compose (i32, flag) pair
- SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
- return DAG.getMergeValues(ops, 2, dl);
-}
-
-SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
- SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default:
- Op.getNode()->dump();
- llvm_unreachable("Should not custom lower this!");
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress:
- llvm_unreachable("TLS not implemented for Blackfin.");
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- // Frame & Return address. Currently unimplemented
- case ISD::FRAMEADDR: return SDValue();
- case ISD::RETURNADDR: return SDValue();
- case ISD::ADDE:
- case ISD::SUBE: return LowerADDE(Op, DAG);
- }
-}
-
-void
-BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- DebugLoc dl = N->getDebugLoc();
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Do not know how to custom type legalize this operation!");
- return;
- case ISD::READCYCLECOUNTER: {
- // The low part of the cycle counter is in CYCLES, the high part in
- // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
- // CYCLES2 last.
- SDValue TheChain = N->getOperand(0);
- SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
- SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
- // Outgoing chain. If we were to use the chain from lo instead, it would be
- // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
- // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
- // read beyond the next CYCLES read, leading to invalid results.
- Results.push_back(hi.getValue(1));
- return;
- }
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Blackfin Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-BlackfinTargetLowering::ConstraintType
-BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
- if (Constraint.size() != 1)
- return TargetLowering::getConstraintType(Constraint);
-
- switch (Constraint[0]) {
- // Standard constraints
- case 'r':
- return C_RegisterClass;
-
- // Blackfin-specific constraints
- case 'a':
- case 'd':
- case 'z':
- case 'D':
- case 'W':
- case 'e':
- case 'b':
- case 'v':
- case 'f':
- case 'c':
- case 't':
- case 'u':
- case 'k':
- case 'x':
- case 'y':
- case 'w':
- return C_RegisterClass;
- case 'A':
- case 'B':
- case 'C':
- case 'Z':
- case 'Y':
- return C_Register;
- }
-
- // Not implemented: q0-q7, qA. Use {R2} etc instead
-
- return TargetLowering::getConstraintType(Constraint);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-BlackfinTargetLowering::getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const {
- ConstraintWeight weight = CW_Invalid;
- Value *CallOperandVal = info.CallOperandVal;
- // If we don't have a value, we can't do a match,
- // but allow it at the lowest weight.
- if (CallOperandVal == NULL)
- return CW_Default;
- // Look at the constraint type.
- switch (*constraint) {
- default:
- weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
-
- // Blackfin-specific constraints
- case 'a':
- case 'd':
- case 'z':
- case 'D':
- case 'W':
- case 'e':
- case 'b':
- case 'v':
- case 'f':
- case 'c':
- case 't':
- case 'u':
- case 'k':
- case 'x':
- case 'y':
- case 'w':
- return CW_Register;
- case 'A':
- case 'B':
- case 'C':
- case 'Z':
- case 'Y':
- return CW_SpecificReg;
- }
- return weight;
-}
-
-/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
-/// constraint.
-std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
- typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
- using namespace BF;
-
- if (Constraint.size() != 1)
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-
- switch (Constraint[0]) {
- // Standard constraints
- case 'r':
- return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
-
- // Blackfin-specific constraints
- case 'a': return Pair(0U, PRegisterClass);
- case 'd': return Pair(0U, DRegisterClass);
- case 'e': return Pair(0U, AccuRegisterClass);
- case 'A': return Pair(A0, AccuRegisterClass);
- case 'B': return Pair(A1, AccuRegisterClass);
- case 'b': return Pair(0U, IRegisterClass);
- case 'v': return Pair(0U, BRegisterClass);
- case 'f': return Pair(0U, MRegisterClass);
- case 'C': return Pair(CC, JustCCRegisterClass);
- case 'x': return Pair(0U, GRRegisterClass);
- case 'w': return Pair(0U, ALLRegisterClass);
- case 'Z': return Pair(P3, PRegisterClass);
- case 'Y': return Pair(P1, PRegisterClass);
- case 'z': return Pair(0U, zConsRegisterClass);
- case 'D': return Pair(0U, DConsRegisterClass);
- case 'W': return Pair(0U, WConsRegisterClass);
- case 'c': return Pair(0U, cConsRegisterClass);
- case 't': return Pair(0U, tConsRegisterClass);
- case 'u': return Pair(0U, uConsRegisterClass);
- case 'k': return Pair(0U, kConsRegisterClass);
- case 'y': return Pair(0U, yConsRegisterClass);
- }
-
- // Not implemented: q0-q7, qA. Use {R2} etc instead.
-
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-bool BlackfinTargetLowering::
-isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- // The Blackfin target isn't yet aware of offsets.
- return false;
-}
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
deleted file mode 100644
index 90908baaae9d..000000000000
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Blackfin uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFIN_ISELLOWERING_H
-#define BLACKFIN_ISELLOWERING_H
-
-#include "llvm/Target/TargetLowering.h"
-#include "Blackfin.h"
-
-namespace llvm {
-
- namespace BFISD {
- enum {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CALL, // A call instruction.
- RET_FLAG, // Return with a flag operand.
- Wrapper // Address wrapper
- };
- }
-
- class BlackfinTargetLowering : public TargetLowering {
- public:
- BlackfinTargetLowering(TargetMachine &TM);
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
- virtual EVT getSetCCResultType(EVT VT) const;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual void ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
-
- ConstraintType getConstraintType(const std::string &Constraint) const;
-
- /// Examine constraint string and operand type and determine a weight value.
- /// The operand object must already have been set up with the operand type.
- ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
-
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
- const char *getTargetNodeName(unsigned Opcode) const;
-
- private:
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
- };
-} // end namespace llvm
-
-#endif // BLACKFIN_ISELLOWERING_H
diff --git a/lib/Target/Blackfin/BlackfinInstrFormats.td b/lib/Target/Blackfin/BlackfinInstrFormats.td
deleted file mode 100644
index d8e6e252e787..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrFormats.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Instruction {
- field bits<32> Inst;
-
- let Namespace = "BF";
-
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let AsmString = asmstr;
- let Pattern = pattern;
-}
-
-// Single-word (16-bit) instructions
-class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstBfin<outs, ins, asmstr, pattern> {
-}
-
-// Double-word (32-bit) instructions
-class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstBfin<outs, ins, asmstr, pattern> {
-}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
deleted file mode 100644
index c06a919708d6..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinInstrInfo.h"
-#include "BlackfinSubtarget.h"
-#include "Blackfin.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_CTOR
-#include "BlackfinGenInstrInfo.inc"
-
-using namespace llvm;
-
-BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
- : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
- RI(ST, *this),
- Subtarget(ST) {}
-
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case BF::LOAD32fi:
- case BF::LOAD16fi:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
-}
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case BF::STORE32fi:
- case BF::STORE16fi:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
-}
-
-unsigned BlackfinInstrInfo::
-InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- // Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 1 || Cond.size() == 0) &&
- "Branch conditions have one component!");
-
- if (Cond.empty()) {
- // Unconditional branch?
- assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
- return 1;
- }
-
- // Conditional branch.
- llvm_unreachable("Implement conditional branches!");
-}
-
-void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- if (BF::D16RegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0);
- return;
- }
-
- if (BF::DRegClass.contains(DestReg)) {
- if (SrcReg == BF::NCC) {
- BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
- return;
- }
- if (SrcReg == BF::CC) {
- BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- }
-
- if (BF::DRegClass.contains(SrcReg)) {
- if (DestReg == BF::NCC) {
- BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
- return;
- }
- if (DestReg == BF::CC) {
- BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- }
-
-
- if (DestReg == BF::NCC && SrcReg == BF::CC) {
- BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- if (DestReg == BF::CC && SrcReg == BF::NCC) {
- BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- llvm_unreachable("Bad reg-to-reg copy");
-}
-
-static bool inClass(const TargetRegisterClass &Test,
- unsigned Reg,
- const TargetRegisterClass *RC) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return Test.contains(Reg);
- else
- return Test.hasSubClassEq(RC);
-}
-
-void
-BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned SrcReg,
- bool isKill,
- int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-
- if (inClass(BF::DPRegClass, SrcReg, RC)) {
- BuildMI(MBB, I, DL, get(BF::STORE32fi))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addImm(0);
- return;
- }
-
- if (inClass(BF::D16RegClass, SrcReg, RC)) {
- BuildMI(MBB, I, DL, get(BF::STORE16fi))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addImm(0);
- return;
- }
-
- if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
- BuildMI(MBB, I, DL, get(BF::STORE8fi))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addImm(0);
- return;
- }
-
- llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
- RC->getName()).c_str());
-}
-
-void BlackfinInstrInfo::
-storeRegToAddr(MachineFunction &MF,
- unsigned SrcReg,
- bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
- llvm_unreachable("storeRegToAddr not implemented");
-}
-
-void
-BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg,
- int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- if (inClass(BF::DPRegClass, DestReg, RC)) {
- BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
- .addFrameIndex(FI)
- .addImm(0);
- return;
- }
-
- if (inClass(BF::D16RegClass, DestReg, RC)) {
- BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
- .addFrameIndex(FI)
- .addImm(0);
- return;
- }
-
- if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
- BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
- .addFrameIndex(FI)
- .addImm(0);
- return;
- }
-
- llvm_unreachable("Cannot load regclass from stack slot");
-}
-
-void BlackfinInstrInfo::
-loadRegFromAddr(MachineFunction &MF,
- unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
- llvm_unreachable("loadRegFromAddr not implemented");
-}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
deleted file mode 100644
index d22ddf0d7313..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFININSTRUCTIONINFO_H
-#define BLACKFININSTRUCTIONINFO_H
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "BlackfinRegisterInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "BlackfinGenInstrInfo.inc"
-
-namespace llvm {
-
- class BlackfinInstrInfo : public BlackfinGenInstrInfo {
- const BlackfinRegisterInfo RI;
- const BlackfinSubtarget& Subtarget;
- public:
- explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
-
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual unsigned
- InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill,
- int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void storeRegToAddr(MachineFunction &MF,
- unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
deleted file mode 100644
index 5b59d7769c7e..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ /dev/null
@@ -1,862 +0,0 @@
-//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Blackfin instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "BlackfinInstrFormats.td"
-
-// These are target-independent nodes, but have target-specific formats.
-def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
- SDTCisVT<1, i32> ]>;
-
-def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
-def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
-def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-
-def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
-
-//===----------------------------------------------------------------------===//
-// Transformations
-//===----------------------------------------------------------------------===//
-
-def trailingZeros_xform : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
- MVT::i32);
-}]>;
-
-def trailingOnes_xform : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
- MVT::i32);
-}]>;
-
-def LO16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
-}]>;
-
-def HI16 : SDNodeXForm<imm, [{
- // Transformation function: shift the immediate value down into the low bits.
- return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Immediates
-//===----------------------------------------------------------------------===//
-
-def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
-def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
-def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
-def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
-
-def uimm5m2 : PatLeaf<(imm), [{
- uint64_t value = N->getZExtValue();
- return value % 2 == 0 && isUInt<5>(value);
-}]>;
-
-def uimm6m4 : PatLeaf<(imm), [{
- uint64_t value = N->getZExtValue();
- return value % 4 == 0 && isUInt<6>(value);
-}]>;
-
-def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
-def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
-def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
-
-def ximm16 : PatLeaf<(imm), [{
- int64_t value = N->getSExtValue();
- return value < (1<<16) && value >= -(1<<15);
-}]>;
-
-def imm17m2 : PatLeaf<(imm), [{
- int64_t value = N->getSExtValue();
- return value % 2 == 0 && isInt<17>(value);
-}]>;
-
-def imm18m4 : PatLeaf<(imm), [{
- int64_t value = N->getSExtValue();
- return value % 4 == 0 && isInt<18>(value);
-}]>;
-
-// 32-bit bitmask transformed to a bit number
-def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
- return isPowerOf2_32(N->getZExtValue());
-}], trailingZeros_xform>;
-
-// 32-bit inverse bitmask transformed to a bit number
-def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
- return isPowerOf2_32(~N->getZExtValue());
-}], trailingOnes_xform>;
-
-//===----------------------------------------------------------------------===//
-// Operands
-//===----------------------------------------------------------------------===//
-
-def calltarget : Operand<iPTR>;
-
-def brtarget : Operand<OtherVT>;
-
-// Addressing modes
-def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
-
-// Address operands
-def MEMii : Operand<i32> {
- let PrintMethod = "printMemoryOperand";
- let MIOperandInfo = (ops i32imm, i32imm);
-}
-
-//===----------------------------------------------------------------------===//
-// Instructions
-//===----------------------------------------------------------------------===//
-
-// Pseudo instructions.
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstBfin<outs, ins, asmstr, pattern>;
-
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
- "${:comment}ADJCALLSTACKDOWN $amt",
- [(BfinCallseqStart timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "${:comment}ADJCALLSTACKUP $amt1 $amt2",
- [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Table C-9. Program Flow Control Instructions
-//===----------------------------------------------------------------------===//
-
-let isBranch = 1, isTerminator = 1 in {
-
-let isIndirectBranch = 1 in
-def JUMPp : F1<(outs), (ins P:$target),
- "JUMP ($target);",
- [(brind P:$target)]>;
-
-// TODO JUMP (PC-P)
-
-// NOTE: assembler chooses between JUMP.S and JUMP.L
-def JUMPa : F1<(outs), (ins brtarget:$target),
- "jump $target;",
- [(br bb:$target)]>;
-
-def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
- "if $cc jump $target;",
- [(brcond AnyCC:$cc, bb:$target)]>;
-}
-
-let isCall = 1,
- Defs = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
-def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
- "call $func;", []>;
-def CALLp: F1<(outs), (ins P:$func, variable_ops),
- "call ($func);", [(BfinCall P:$func)]>;
-}
-
-let isReturn = 1,
- isTerminator = 1,
- isBarrier = 1,
- Uses = [RETS] in
-def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
-
-//===----------------------------------------------------------------------===//
-// Table C-10. Load / Store Instructions
-//===----------------------------------------------------------------------===//
-
-// Immediate constant loads
-
-// sext immediate, i32 D/P regs
-def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
- "$dst = $src (x);",
- [(set DP:$dst, imm7:$src)]>;
-
-// zext immediate, i32 reg groups 0-3
-def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
- "$dst = $src (z);",
- [(set GR:$dst, uimm16:$src)]>;
-
-// sext immediate, i32 reg groups 0-3
-def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
- "$dst = $src (x);",
- [(set GR:$dst, imm16:$src)]>;
-
-// Pseudo-instruction for loading a general 32-bit constant.
-def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
- "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
- [(set GR:$dst, imm:$src)]>;
-
-def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
- "$dst.h = $src; $dst.l = $src;", []>;
-
-
-// 16-bit immediate, i16 reg groups 0-3
-def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
- "$dst = $src;", []>;
-
-def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
- (LOAD32sym tglobaladdr:$addr)>;
-
-def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
- (LOAD32sym tjumptable:$addr)>;
-
-// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
-// emit GR16 instructions. As a hack, we use this fake instruction instead.
-def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
- "$dst = $src;",
- [(set D16:$dst, ximm16:$src)]>;
-
-// Memory loads with patterns
-
-def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
- "$dst = [$ptr];",
- [(set DP:$dst, (load P:$ptr))]>;
-
-// Pseudo-instruction for loading a stack slot
-def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
- "${:comment}FI $dst = [$mem];",
- [(set DP:$dst, (load ADDRspii:$mem))]>;
-
-// Note: Expands to multiple insns
-def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
- "${:comment}FI $dst = [$mem];",
- [(set D16:$dst, (load ADDRspii:$mem))]>;
-
-// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
-// Replaced with Load D + CC=D
-def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
- "${:comment}FI $dst = B[$mem];",
- [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
-
-def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = [$ptr + $off];",
- [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
-
-def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = [$ptr + $off];",
- [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
-
-def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
- "$dst = W[$ptr] (z);",
- [(set D:$dst, (zextloadi16 P:$ptr))]>;
-
-def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
-
-def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = w[$ptr + $off] (z);",
- [(set D:$dst, (zextloadi16 (add P:$ptr,
- uimm5m2:$off)))]>;
-
-def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
- (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
-
-def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = w[$ptr + $off] (z);",
- [(set D:$dst,
- (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
-
-def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
- (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
-
-def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
- "$dst = w[$ptr] (x);",
- [(set D:$dst, (sextloadi16 P:$ptr))]>;
-
-def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = w[$ptr + $off] (x);",
- [(set D:$dst,
- (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
-
-def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = w[$ptr + $off] (x);",
- [(set D:$dst,
- (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
-
-def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
- "$dst = w[$ptr];",
- [(set D16:$dst, (load PI:$ptr))]>;
-
-def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
- "$dst = B[$ptr] (z);",
- [(set D:$dst, (zextloadi8 P:$ptr))]>;
-
-def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
-def : Pat<(i16 (extloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
-def : Pat<(i16 (zextloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
-
-def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = b[$ptr + $off] (z);",
- [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
-
-def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
- (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
-def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
- (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
- lo16)>;
-def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
- (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
- lo16)>;
-
-def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
- "$dst = b[$ptr] (x);",
- [(set D:$dst, (sextloadi8 P:$ptr))]>;
-
-def : Pat<(i16 (sextloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
-
-def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
- "$dst = b[$ptr + $off] (x);",
- [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
-
-def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
- (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
- lo16)>;
-// Memory loads without patterns
-
-let mayLoad = 1 in {
-
-multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
- string mem="", string suf=";"> {
- def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
- !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
- def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
- !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
-}
-multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
- string mem="", string suf=";">
- : LOAD_incdec<drc, prc, mem, suf> {
- def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
- !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
-}
-
-defm LOAD32p: LOAD_incdec<DP, P>;
-defm LOAD32i: LOAD_incdec<D, I>;
-defm LOAD8z32p: LOAD_incdec<D, P, "b", " (z);">;
-defm LOAD8s32p: LOAD_incdec<D, P, "b", " (x);">;
-defm LOADhi: LOAD_incdec<D16, I, "w">;
-defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
-defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
-
-def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
- "$dst = [$ptr ++ $off];", []>;
-
-// Note: $fp MUST be FP
-def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
- "$dst = [$fp - $off];", []>;
-
-def LOAD32i: F1<(outs D:$dst), (ins I:$ptr),
- "$dst = [$ptr];", []>;
-def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
- "$dst = [$ptr ++ $off];", []>;
-
-
-
-def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
- "$dst = w[$ptr ++ $off];", []>;
-
-
-}
-
-// Memory stores with patterns
-def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
- "[$ptr] = $val;",
- [(store DP:$val, P:$ptr)]>;
-
-// Pseudo-instructions for storing to a stack slot
-def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
- "${:comment}FI [$mem] = $val;",
- [(store DP:$val, ADDRspii:$mem)]>;
-
-// Note: This stack-storing pseudo-instruction is expanded to multiple insns
-def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
- "${:comment}FI [$mem] = $val;",
- [(store D16:$val, ADDRspii:$mem)]>;
-
-// Pseudo-instructions for storing AnyCC register to a stack slot.
-// Replaced with D=CC + STORE byte
-def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
- "${:comment}FI b[$mem] = $val;",
- [(store AnyCC:$val, ADDRspii:$mem)]>;
-
-def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
- "[$ptr + $off] = $val;",
- [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
-
-def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
- "[$ptr + $off] = $val;",
- [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
-
-def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
- "w[$ptr] = $val;",
- [(store D16:$val, PI:$ptr)]>;
-
-def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
- "b[$ptr] = $val;",
- [(truncstorei8 D:$val, P:$ptr)]>;
-
-def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
- "b[$ptr + $off] = $val;",
- [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
-
-let Constraints = "$ptr = $ptr_wb" in {
-
-multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
- int off=4, string pre=""> {
- def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
- !strconcat(pre, "[$ptr++] = $val;"),
- [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
- def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
- !strconcat(pre, "[$ptr--] = $val;"),
- [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
- (ineg off)))]>;
-}
-
-defm STORE32p: STORE_incdec<DP, P>;
-defm STORE16i: STORE_incdec<D16, I, 2, "w">;
-defm STORE8p: STORE_incdec<D, P, 1, "b">;
-
-def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
- "[$ptr ++ $off] = $val;",
- [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
-
-def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
- "w[$ptr ++ $off] = $val;",
- [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
-}
-
-// Memory stores without patterns
-
-let mayStore = 1 in {
-
-// Note: only works for $fp == FP
-def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
- "[$fp - $off] = $val;", []>;
-
-def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
- "[$ptr] = $val;", []>;
-
-def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
- "[$ptr++] = $val;", []>;
-
-def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
- "[$ptr--] = $val;", []>;
-
-def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
- "[$ptr ++ $off] = $val;", []>;
-}
-
-def : Pat<(truncstorei16 D:$val, PI:$ptr),
- (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
- lo16), PI:$ptr)>;
-
-def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
- (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
- hi16), PI:$ptr)>;
-
-def : Pat<(truncstorei8 D16L:$val, P:$ptr),
- (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
- lo16),
- P:$ptr)>;
-
-//===----------------------------------------------------------------------===//
-// Table C-11. Move Instructions.
-//===----------------------------------------------------------------------===//
-
-def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
- "$dst = $src;",
- []>;
-
-let Constraints = "$src1 = $dst" in
-def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
- "if $cc $dst = $src2;",
- [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
-
-let Defs = [AZ, AN, AC0, V] in {
-def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
- "$dst = $src (z);",
- [(set D:$dst, (zext D16L:$src))]>;
-
-def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
- "$dst = $src (x);",
- [(set D:$dst, (sext D16L:$src))]>;
-
-def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
- "$dst = $src.b (z);",
- [(set D:$dst, (and D:$src, 0xff))]>;
-
-def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
- "$dst = $src.b (x);",
- [(set D:$dst, (sext_inreg D:$src, i8))]>;
-
-}
-
-def : Pat<(sext_inreg D16L:$src, i8),
- (EXTRACT_SUBREG (MOVEsext8
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- D16L:$src,
- lo16)),
- lo16)>;
-
-def : Pat<(sext_inreg D:$src, i16),
- (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
-
-def : Pat<(and D:$src, 0xffff),
- (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
-
-def : Pat<(i32 (anyext D16L:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
- lo16)>;
-
-// TODO Dreg = Dreg_byte (X/Z)
-
-// TODO Accumulator moves
-
-//===----------------------------------------------------------------------===//
-// Table C-12. Stack Control Instructions
-//===----------------------------------------------------------------------===//
-
-let Uses = [SP], Defs = [SP] in {
-def PUSH: F1<(outs), (ins ALL:$src),
- "[--sp] = $src;", []> { let mayStore = 1; }
-
-// NOTE: POP does not work for DP regs, use LOAD instead
-def POP: F1<(outs ALL:$dst), (ins),
- "$dst = [sp++];", []> { let mayLoad = 1; }
-}
-
-// TODO: push/pop multiple
-
-def LINK: F2<(outs), (ins i32imm:$amount),
- "link $amount;", []>;
-
-def UNLINK: F2<(outs), (ins),
- "unlink;", []>;
-
-//===----------------------------------------------------------------------===//
-// Table C-13. Control Code Bit Management Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
- def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
- !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
- [(set JustCC:$cc, (opnode D:$a, D:$b))]>;
-
- def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
- !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
- [(set JustCC:$cc, (opnode DP:$a, imm3:$b))]>;
-
- def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
- !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
- []>;
-
- def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
- !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
- [(set NotCC:$cc, (invnode DP:$a, imm3:$b))]>;
-}
-
-defm SETEQ : SETCC<seteq, setne, "==">;
-defm SETLT : SETCC<setlt, setge, "<">;
-defm SETLE : SETCC<setle, setgt, "<=">;
-defm SETULT : SETCC<setult, setuge, "<", " (iu);">;
-defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
-
-def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
- "cc = $a == $b;",
- [(set NotCC:$cc, (setne D:$a, D:$b))]>;
-
-def : Pat<(setgt D:$a, D:$b), (SETLTdd D:$b, D:$a)>;
-def : Pat<(setge D:$a, D:$b), (SETLEdd D:$b, D:$a)>;
-def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
-def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
-
-// TODO: compare pointer for P-P comparisons
-// TODO: compare accumulator
-
-let Defs = [AC0] in
-def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
- "ac0 \\|= cc;", []>;
-
-let Uses = [AC0] in
-def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
- "cc = ac0;", []>;
-
-def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
- "cc = !cc;", []>;
-
-def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
- "cc = !cc;", []>;
-
-def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
- "$dst = $cc;", []>;
-
-def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
- "$dst = cc;", []>;
-
-def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
- "cc = $src;",
- [(set AnyCC:$cc, (setne D:$src, 0))]>;
-
-//===----------------------------------------------------------------------===//
-// Table C-14. Logical Operations Instructions
-//===----------------------------------------------------------------------===//
-
-def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = $src1 & $src2;",
- [(set D:$dst, (and D:$src1, D:$src2))]>;
-
-def NOT: F1<(outs D:$dst), (ins D:$src),
- "$dst = ~$src;",
- [(set D:$dst, (not D:$src))]>;
-
-def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = $src1 \\| $src2;",
- [(set D:$dst, (or D:$src1, D:$src2))]>;
-
-def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = $src1 ^ $src2;",
- [(set D:$dst, (xor D:$src1, D:$src2))]>;
-
-// missing: BXOR, BXORSHIFT
-
-//===----------------------------------------------------------------------===//
-// Table C-15. Bit Operations Instructions
-//===----------------------------------------------------------------------===//
-
-let Constraints = "$src1 = $dst" in {
-def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
- "bitclr($dst, $src2);",
- [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
-
-def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
- "bitset($dst, $src2);",
- [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
-
-def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
- "bittgl($dst, $src2);",
- [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
-}
-
-def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
- "cc = bittst($src1, $src2);",
- [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
- (i32 0)))]>;
-
-def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
- "cc = !bittst($src1, $src2);",
- [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
- (i32 0)))]>;
-
-// TODO: DEPOSIT, EXTRACT, BITMUX
-
-def ONES: F2<(outs D16L:$dst), (ins D:$src),
- "$dst = ones $src;",
- [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
-
-def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
-
-//===----------------------------------------------------------------------===//
-// Table C-16. Shift / Rotate Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass SHIFT32<SDNode opnode, string ops> {
- def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
- !subst("XX", ops, "$dst XX= $amount;"),
- [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
- def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
- !subst("XX", ops, "$dst XX= $amount;"),
- [(set D:$dst, (opnode D:$src, D:$amount))]>;
-}
-
-let Defs = [AZ, AN, V, VS],
- Constraints = "$src = $dst" in {
-defm SRA : SHIFT32<sra, ">>>">;
-defm SRL : SHIFT32<srl, ">>">;
-defm SLL : SHIFT32<shl, "<<">;
-}
-
-// TODO: automatic switching between 2-addr and 3-addr (?)
-
-let Defs = [AZ, AN, V, VS] in {
-def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
- "$dst = lshift $src by $amount;",
- [(set D:$dst, (shl D:$src, D16L:$amount))]>;
-
-// Arithmetic left-shift = saturing overflow.
-def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
- "$dst = ashift $src by $amount;",
- [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
-
-def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
- "$dst = $src >>> $amount;",
- [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
-
-def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
- "$dst = $src >> $amount;",
- [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
-
-// Arithmetic left-shift = saturing overflow.
-def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
- "$dst = ashift $src BY $amount;",
- [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
-
-def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
- "$dst = $src << $amount;",
- [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
-
-def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
- "$dst = lshift $src by $amount;",
- [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Table C-17. Arithmetic Operations Instructions
-//===----------------------------------------------------------------------===//
-
-// TODO: ABS
-
-let Defs = [AZ, AN, AC0, V, VS] in {
-
-def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = $src1 + $src2;",
- [(set D:$dst, (add D:$src1, D:$src2))]>;
-
-def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
- "$dst = $src1 + $src2;",
- [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
-
-let Constraints = "$src1 = $dst" in
-def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
- "$dst += $src2;",
- [(set D:$dst, (add D:$src1, imm7:$src2))]>;
-
-def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = $src1 - $src2;",
- [(set D:$dst, (sub D:$src1, D:$src2))]>;
-
-def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
- "$dst = $src1 - $src2;",
- [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
-
-}
-
-def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
-def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
-
-let Defs = [AZ, AN, V, VS] in
-def NEG: F1<(outs D:$dst), (ins D:$src),
- "$dst = -$src;",
- [(set D:$dst, (ineg D:$src))]>;
-
-// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
-def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
- "$dst = $src1 + $src2;", []>;
-
-let Constraints = "$src1 = $dst" in
-def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
- "$dst += $src2;", []>;
-
-let Defs = [AZ, AN, V] in
-def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
- "$dst = $src1 + $src2 (rnd20);", []>;
-
-let Defs = [V, VS] in {
-def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
- "$dst = $src1 * $src2 (is);",
- [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
-
-def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
- "$dst = $src1 * $src2 (ih);",
- [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
-
-def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
- "$dst = $src1 * $src2 (is);",
- [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
-
-def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
- "$dst = $src1 * $src2 (is);",
- [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
-}
-
-
-let Constraints = "$src1 = $dst" in
-def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst *= $src2;",
- [(set D:$dst, (mul D:$src1, D:$src2))]>;
-
-//===----------------------------------------------------------------------===//
-// Table C-18. External Exent Management Instructions
-//===----------------------------------------------------------------------===//
-
-def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
-def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
-def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
-def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
-def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
-def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
-def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
-def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
-def NOP : F1<(outs), (ins), "nop;", []>;
-def MNOP : F2<(outs), (ins), "mnop;", []>;
-def ABORT : F1<(outs), (ins), "abort;", []>;
-
-//===----------------------------------------------------------------------===//
-// Table C-19. Cache Control Instructions
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Table C-20. Video Pixel Operations Instructions
-//===----------------------------------------------------------------------===//
-
-def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = align8($src1, $src2);",
- [(set D:$dst, (or (shl D:$src1, (i32 24)),
- (srl D:$src2, (i32 8))))]>;
-
-def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = align16($src1, $src2);",
- [(set D:$dst, (or (shl D:$src1, (i32 16)),
- (srl D:$src2, (i32 16))))]>;
-
-def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
- "$dst = align16($src1, $src2);",
- [(set D:$dst, (or (shl D:$src1, (i32 8)),
- (srl D:$src2, (i32 24))))]>;
-
-def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
-
-// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
-// BYTEPACK, BYTEUNPACK
-
-// Table C-21. Vector Operations Instructions
-
-// Patterns
-def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
- (CALLa tglobaladdr:$dst)>;
-def : Pat<(BfinCall (i32 texternalsym:$dst)),
- (CALLa texternalsym:$dst)>;
-def : Pat<(i16 (trunc D:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
deleted file mode 100644
index 71356768dd5d..000000000000
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of TargetIntrinsicInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinIntrinsicInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstring>
-
-using namespace llvm;
-
-namespace bfinIntrinsic {
-
- enum ID {
- last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1,
-#define GET_INTRINSIC_ENUM_VALUES
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_ENUM_VALUES
- , num_bfin_intrinsics
- };
-
-}
-
-std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
- unsigned numTys) const {
- static const char *const names[] = {
-#define GET_INTRINSIC_NAME_TABLE
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_NAME_TABLE
- };
-
- assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
- if (IntrID < Intrinsic::num_intrinsics)
- return 0;
- assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");
-
- std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
- return Result;
-}
-
-unsigned
-BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
- if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
- || Name[2] != 'v' || Name[3] != 'm')
- return 0; // All intrinsics start with 'llvm.'
-
-#define GET_FUNCTION_RECOGNIZER
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_FUNCTION_RECOGNIZER
- return 0;
-}
-
-bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
- // Overload Table
- const bool OTable[] = {
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
- };
- if (IntrID == 0)
- return false;
- else
- return OTable[IntrID - Intrinsic::num_intrinsics];
-}
-
-/// This defines the "getAttributes(ID id)" method.
-#define GET_INTRINSIC_ATTRIBUTES
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_ATTRIBUTES
-
-static FunctionType *getType(LLVMContext &Context, unsigned id) {
- Type *ResultTy = NULL;
- std::vector<Type*> ArgTys;
- bool IsVarArg = false;
-
-#define GET_INTRINSIC_GENERATOR
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_GENERATOR
-
- return FunctionType::get(ResultTy, ArgTys, IsVarArg);
-}
-
-Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- Type **Tys,
- unsigned numTy) const {
- assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
- AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
- return cast<Function>(M->getOrInsertFunction(getName(IntrID),
- getType(M->getContext(), IntrID),
- AList));
-}
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
deleted file mode 100644
index f05db5ad7cd9..000000000000
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of TargetIntrinsicInfo.
-//
-//===----------------------------------------------------------------------===//
-#ifndef BLACKFININTRINSICS_H
-#define BLACKFININTRINSICS_H
-
-#include "llvm/Target/TargetIntrinsicInfo.h"
-
-namespace llvm {
-
- class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
- public:
- std::string getName(unsigned IntrID, Type **Tys = 0,
- unsigned numTys = 0) const;
- unsigned lookupName(const char *Name, unsigned Len) const;
- bool isOverloaded(unsigned IID) const;
- Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
- unsigned numTys = 0) const;
- };
-
-}
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td
deleted file mode 100644
index ce21b082376f..000000000000
--- a/lib/Target/Blackfin/BlackfinIntrinsics.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the blackfin-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "bfin", isTarget = 1 in {
-
-//===----------------------------------------------------------------------===//
-// Core synchronisation etc.
-//
-// These intrinsics have sideeffects. Each represent a single instruction, but
-// workarounds are sometimes required depending on the cpu.
-
-// Execute csync instruction with workarounds
-def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
- Intrinsic<[]>;
-
-// Execute ssync instruction with workarounds
-def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
- Intrinsic<[]>;
-
-// Execute idle instruction with workarounds
-def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
- Intrinsic<[]>;
-
-}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
deleted file mode 100644
index 0d415c5f342b..000000000000
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Blackfin.h"
-#include "BlackfinRegisterInfo.h"
-#include "BlackfinSubtarget.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "BlackfinGenRegisterInfo.inc"
-
-using namespace llvm;
-
-BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
- const TargetInstrInfo &tii)
- : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {}
-
-const unsigned*
-BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- using namespace BF;
- static const unsigned CalleeSavedRegs[] = {
- FP,
- R4, R5, R6, R7,
- P3, P4, P5,
- 0 };
- return CalleeSavedRegs;
-}
-
-BitVector
-BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- using namespace BF;
- BitVector Reserved(getNumRegs());
- Reserved.set(AZ);
- Reserved.set(AN);
- Reserved.set(AQ);
- Reserved.set(AC0);
- Reserved.set(AC1);
- Reserved.set(AV0);
- Reserved.set(AV0S);
- Reserved.set(AV1);
- Reserved.set(AV1S);
- Reserved.set(V);
- Reserved.set(VS);
- Reserved.set(CYCLES).set(CYCLES2);
- Reserved.set(L0);
- Reserved.set(L1);
- Reserved.set(L2);
- Reserved.set(L3);
- Reserved.set(SP);
- Reserved.set(RETS);
- if (TFI->hasFP(MF))
- Reserved.set(FP);
- return Reserved;
-}
-
-bool BlackfinRegisterInfo::
-requiresRegisterScavenging(const MachineFunction &MF) const {
- return true;
-}
-
-// Emit instructions to add delta to D/P register. ScratchReg must be of the
-// same class as Reg (P).
-void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- DebugLoc DL,
- unsigned Reg,
- unsigned ScratchReg,
- int delta) const {
- if (!delta)
- return;
- if (isInt<7>(delta)) {
- BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
- .addReg(Reg) // No kill on two-addr operand
- .addImm(delta);
- return;
- }
-
- // We must load delta into ScratchReg and add that.
- loadConstant(MBB, I, DL, ScratchReg, delta);
- if (BF::PRegClass.contains(Reg)) {
- assert(BF::PRegClass.contains(ScratchReg) &&
- "ScratchReg must be a P register");
- BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
- .addReg(Reg, RegState::Kill)
- .addReg(ScratchReg, RegState::Kill);
- } else {
- assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
- assert(BF::DRegClass.contains(ScratchReg) &&
- "ScratchReg must be a D register");
- BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
- .addReg(Reg, RegState::Kill)
- .addReg(ScratchReg, RegState::Kill);
- }
-}
-
-// Emit instructions to load a constant into D/P register
-void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- DebugLoc DL,
- unsigned Reg,
- int value) const {
- if (isInt<7>(value)) {
- BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
- return;
- }
-
- if (isUInt<16>(value)) {
- BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
- return;
- }
-
- if (isInt<16>(value)) {
- BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
- return;
- }
-
- // We must split into halves
- BuildMI(MBB, I, DL,
- TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
- .addImm((value >> 16) & 0xffff)
- .addReg(Reg, RegState::ImplicitDefine);
- BuildMI(MBB, I, DL,
- TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
- .addImm(value & 0xffff)
- .addReg(Reg, RegState::ImplicitKill)
- .addReg(Reg, RegState::ImplicitDefine);
-}
-
-void BlackfinRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
- if (Amount != 0) {
- assert(Amount%4 == 0 && "Unaligned call frame size");
- if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
- adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
- } else {
- assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
- "Unknown call frame pseudo instruction");
- adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
- }
- }
- }
- MBB.erase(I);
-}
-
-/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static unsigned findScratchRegister(MachineBasicBlock::iterator II,
- RegScavenger *RS,
- const TargetRegisterClass *RC,
- int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
-void
-BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
- MachineInstr &MI = *II;
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned FIPos;
- for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
- assert(FIPos < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
- int FrameIndex = MI.getOperand(FIPos).getIndex();
- assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
- + MI.getOperand(FIPos+1).getImm();
- unsigned BaseReg = BF::FP;
- if (TFI->hasFP(MF)) {
- assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
- } else {
- BaseReg = BF::SP;
- Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
- }
-
- bool isStore = false;
-
- switch (MI.getOpcode()) {
- case BF::STORE32fi:
- isStore = true;
- case BF::LOAD32fi: {
- assert(Offset%4 == 0 && "Unaligned i32 stack access");
- assert(FIPos==1 && "Bad frame index operand");
- MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
- MI.getOperand(FIPos+1).setImm(Offset);
- if (isUInt<6>(Offset)) {
- MI.setDesc(TII.get(isStore
- ? BF::STORE32p_uimm6m4
- : BF::LOAD32p_uimm6m4));
- return;
- }
- if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
- MI.setDesc(TII.get(isStore
- ? BF::STORE32fp_nimm7m4
- : BF::LOAD32fp_nimm7m4));
- MI.getOperand(FIPos+1).setImm(-Offset);
- return;
- }
- if (isInt<18>(Offset)) {
- MI.setDesc(TII.get(isStore
- ? BF::STORE32p_imm18m4
- : BF::LOAD32p_imm18m4));
- return;
- }
- // Use RegScavenger to calculate proper offset...
- MI.dump();
- llvm_unreachable("Stack frame offset too big");
- break;
- }
- case BF::ADDpp: {
- assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
- unsigned DestReg = MI.getOperand(0).getReg();
- // We need to produce a stack offset in a P register. We emit:
- // P0 = offset;
- // P0 = BR + P0;
- assert(FIPos==1 && "Bad frame index operand");
- loadConstant(MBB, II, DL, DestReg, Offset);
- MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(2).ChangeToRegister(BaseReg, false);
- break;
- }
- case BF::STORE16fi:
- isStore = true;
- case BF::LOAD16fi: {
- assert(Offset%2 == 0 && "Unaligned i16 stack access");
- assert(FIPos==1 && "Bad frame index operand");
- // We need a P register to use as an address
- unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
- assert(ScratchReg && "Could not scavenge register");
- loadConstant(MBB, II, DL, ScratchReg, Offset);
- BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addReg(BaseReg);
- MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
- MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
- MI.RemoveOperand(2);
- break;
- }
- case BF::STORE8fi: {
- // This is an AnyCC spill, we need a scratch register.
- assert(FIPos==1 && "Bad frame index operand");
- MachineOperand SpillReg = MI.getOperand(0);
- unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
- assert(ScratchReg && "Could not scavenge register");
- if (SpillReg.getReg()==BF::NCC) {
- BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
- .addOperand(SpillReg);
- BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
- .addReg(ScratchReg).addImm(0);
- } else {
- BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
- .addOperand(SpillReg);
- }
- // STORE D
- MI.setDesc(TII.get(BF::STORE8p_imm16));
- MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
- MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
- MI.getOperand(FIPos+1).setImm(Offset);
- break;
- }
- case BF::LOAD8fi: {
- // This is an restore, we need a scratch register.
- assert(FIPos==1 && "Bad frame index operand");
- MachineOperand SpillReg = MI.getOperand(0);
- unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
- assert(ScratchReg && "Could not scavenge register");
- MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
- MI.getOperand(0).ChangeToRegister(ScratchReg, true);
- MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
- MI.getOperand(FIPos+1).setImm(Offset);
- ++II;
- if (SpillReg.getReg()==BF::CC) {
- // CC = D
- BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
- .addReg(ScratchReg, RegState::Kill);
- } else {
- // Restore NCC (CC = D==0)
- BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(0);
- }
- break;
- }
- default:
- llvm_unreachable("Cannot eliminate frame index");
- break;
- }
-}
-
-unsigned
-BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- return TFI->hasFP(MF) ? BF::FP : BF::SP;
-}
-
-unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
- return 0;
-}
-
-unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
- return 0;
-}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
deleted file mode 100644
index 6ac22af793e0..000000000000
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINREGISTERINFO_H
-#define BLACKFINREGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "BlackfinGenRegisterInfo.inc"
-
-namespace llvm {
-
- class BlackfinSubtarget;
- class TargetInstrInfo;
- class Type;
-
- struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
- BlackfinSubtarget &Subtarget;
- const TargetInstrInfo &TII;
-
- BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
-
- /// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
- BitVector getReservedRegs(const MachineFunction &MF) const;
-
- // getSubReg implemented by tablegen
-
- const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
- return &BF::PRegClass;
- }
-
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
-
- unsigned getFrameRegister(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
- // Utility functions
- void adjustRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- DebugLoc DL,
- unsigned Reg,
- unsigned ScratchReg,
- int delta) const;
- void loadConstant(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- DebugLoc DL,
- unsigned Reg,
- int value) const;
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
deleted file mode 100644
index 1c42205eb780..000000000000
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ /dev/null
@@ -1,277 +0,0 @@
-//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the Blackfin register file
-//===----------------------------------------------------------------------===//
-
-// Subregs are:
-// 1: .L
-// 2: .H
-// 3: .W (32 low bits of 40-bit accu)
-let Namespace = "BF" in {
-def lo16 : SubRegIndex;
-def hi16 : SubRegIndex;
-def lo32 : SubRegIndex;
-def hi32 : SubRegIndex;
-}
-
-// Registers are identified with 3-bit group and 3-bit ID numbers.
-class BlackfinReg<string n> : Register<n> {
- field bits<3> Group;
- field bits<3> Num;
- let Namespace = "BF";
-}
-
-// Rc - 1-bit registers
-class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
- field bits<5> BitNum = bitno;
-}
-
-// Rs - 16-bit integer registers
-class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
- let Group = group;
- let Num = num;
- field bits<1> High = hi;
-}
-
-// Ri - 32-bit integer registers with subregs
-class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
- let Group = group;
- let Num = num;
-}
-
-// Ra 40-bit accumulator registers
-class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
- let SubRegs = subs;
- let SubRegIndices = [hi32, lo32];
- let Group = 4;
- let Num = num;
-}
-
-// Two halves of 32-bit register
-multiclass Rss<bits<3> group, bits<3> num, string n> {
- def H : Rs<group, num, 1, !strconcat(n, ".h")>;
- def L : Rs<group, num, 0, !strconcat(n, ".l")>;
-}
-
-// Rii - 32-bit integer registers with subregs
-class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
- : BlackfinReg<n> {
- let SubRegs = subs;
- let SubRegIndices = [hi16, lo16];
- let Group = group;
- let Num = num;
-}
-
-// Status bits are all part of ASTAT
-def AZ : Rc<0, "az">;
-def AN : Rc<1, "an">;
-def CC : Rc<5, "cc">, DwarfRegNum<[34]>;
-def NCC : Rc<5, "!cc"> { let Aliases = [CC]; }
-def AQ : Rc<6, "aq">;
-def AC0 : Rc<12, "ac0">;
-def AC1 : Rc<13, "ac1">;
-def AV0 : Rc<16, "av0">;
-def AV0S : Rc<17, "av0s">;
-def AV1 : Rc<18, "av1">;
-def AV1S : Rc<19, "av1s">;
-def V : Rc<24, "v">;
-def VS : Rc<25, "vs">;
-// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
-
-// Group 0: Integer registers
-defm R0 : Rss<0, 0, "r0">;
-def R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
-defm R1 : Rss<0, 1, "r1">;
-def R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
-defm R2 : Rss<0, 2, "r2">;
-def R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
-defm R3 : Rss<0, 3, "r3">;
-def R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
-defm R4 : Rss<0, 4, "r4">;
-def R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
-defm R5 : Rss<0, 5, "r5">;
-def R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
-defm R6 : Rss<0, 6, "r6">;
-def R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
-defm R7 : Rss<0, 7, "r7">;
-def R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
-
-// Group 1: Pointer registers
-defm P0 : Rss<1, 0, "p0">;
-def P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
-defm P1 : Rss<1, 1, "p1">;
-def P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
-defm P2 : Rss<1, 2, "p2">;
-def P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
-defm P3 : Rss<1, 3, "p3">;
-def P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
-defm P4 : Rss<1, 4, "p4">;
-def P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
-defm P5 : Rss<1, 5, "p5">;
-def P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
-defm SP : Rss<1, 6, "sp">;
-def SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
-defm FP : Rss<1, 7, "fp">;
-def FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
-
-// Group 2: Index registers
-defm I0 : Rss<2, 0, "i0">;
-def I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
-defm I1 : Rss<2, 1, "i1">;
-def I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
-defm I2 : Rss<2, 2, "i2">;
-def I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
-defm I3 : Rss<2, 3, "i3">;
-def I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
-defm M0 : Rss<2, 4, "m0">;
-def M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
-defm M1 : Rss<2, 5, "m1">;
-def M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
-defm M2 : Rss<2, 6, "m2">;
-def M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
-defm M3 : Rss<2, 7, "m3">;
-def M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
-
-// Group 3: Cyclic indexing registers
-defm B0 : Rss<3, 0, "b0">;
-def B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
-defm B1 : Rss<3, 1, "b1">;
-def B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
-defm B2 : Rss<3, 2, "b2">;
-def B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
-defm B3 : Rss<3, 3, "b3">;
-def B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
-defm L0 : Rss<3, 4, "l0">;
-def L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
-defm L1 : Rss<3, 5, "l1">;
-def L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
-defm L2 : Rss<3, 6, "l2">;
-def L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
-defm L3 : Rss<3, 7, "l3">;
-def L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
-
-// Accumulators
-def A0X : Ri <4, 0, "a0.x">;
-defm A0 : Rss<4, 1, "a0">;
-def A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
-def A0 : Ra <0, "a0", [A0X, A0W]>;
-
-def A1X : Ri <4, 2, "a1.x">;
-defm A1 : Rss<4, 3, "a1">;
-def A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
-def A1 : Ra <2, "a1", [A1X, A1W]>;
-
-def RETS : Ri<4, 7, "rets">, DwarfRegNum<[35]>;
-def RETI : Ri<7, 3, "reti">, DwarfRegNum<[36]>;
-def RETX : Ri<7, 4, "retx">, DwarfRegNum<[37]>;
-def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>;
-def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>;
-
-def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> {
- let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
-}
-
-def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
-def USP : Ri<7, 0, "usp">, DwarfRegNum<[42]>;
-def EMUDAT : Ri<7, 7, "emudat">, DwarfRegNum<[43]>;
-def SYSCFG : Ri<7, 2, "syscfg">;
-def CYCLES : Ri<6, 6, "cycles">;
-def CYCLES2 : Ri<6, 7, "cycles2">;
-
-// Hardware loops
-def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
-def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
-def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
-def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
-def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
-def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
-
-// Register classes.
-def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
-
-def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
-
-def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
-
-def P16L : RegisterClass<"BF", [i16], 16,
- (add (sequence "P%uL", 0, 5), SPL, FPL)>;
-
-def P16H : RegisterClass<"BF", [i16], 16,
- (add (sequence "P%uH", 0, 5), SPH, FPH)>;
-
-def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
-
-def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
-
-def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
-
-def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
-
-def GR16 : RegisterClass<"BF", [i16], 16,
- (add DP16,
- I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
- M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
- B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
- L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
-
-def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
- let SubRegClasses = [(D16L lo16), (D16H hi16)];
-}
-
-def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
- let SubRegClasses = [(P16L lo16), (P16H hi16)];
-}
-
-def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
- let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
-}
-
-def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
-def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
-def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
-def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
-
-def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
-
-def ALL : RegisterClass<"BF", [i32], 32,
- (add GR,
- A0X, A0W, A1X, A1W, ASTAT, RETS,
- LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
- USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
-
-def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
-
-// We are going to pretend that CC and !CC are 32-bit registers, even though
-// they only can hold 1 bit.
-let CopyCost = -1, Size = 8 in {
-def JustCC : RegisterClass<"BF", [i32], 8, (add CC)>;
-def NotCC : RegisterClass<"BF", [i32], 8, (add NCC)>;
-def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
-def StatBit : RegisterClass<"BF", [i1], 8,
- (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
-}
-
-// Should be i40, but that isn't defined. It is not a legal type yet anyway.
-def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
-
-// Register classes to match inline asm constraints.
-def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
-def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
-def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
-def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
- B0, B1, B2, B3,
- L0, L1, L2, L3)>;
-def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
-def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
-def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
-def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
- RETE, ASTAT, SEQSTAT,
- USP)>;
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
deleted file mode 100644
index a21f696a62eb..000000000000
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the BlackfinSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "blackfin-selectiondag-info"
-#include "BlackfinTargetMachine.h"
-using namespace llvm;
-
-BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
- const BlackfinTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
-
-BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
-}
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
deleted file mode 100644
index f1ce3482f90f..000000000000
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINSELECTIONDAGINFO_H
-#define BLACKFINSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class BlackfinTargetMachine;
-
-class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
- ~BlackfinSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp
deleted file mode 100644
index 0bdce09177ed..000000000000
--- a/lib/Target/Blackfin/BlackfinSubtarget.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the blackfin specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinSubtarget.h"
-#include "Blackfin.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "BlackfinGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
- icplb(false),
- wa_mi_shift(false),
- wa_csync(false),
- wa_specld(false),
- wa_mmr_stall(false),
- wa_lcregs(false),
- wa_hwloop(false),
- wa_ind_call(false),
- wa_killed_mmr(false),
- wa_rets(false)
-{
- std::string CPUName = CPU;
- if (CPUName.empty())
- CPUName = "generic";
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-}
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h
deleted file mode 100644
index 1a01a81116d6..000000000000
--- a/lib/Target/Blackfin/BlackfinSubtarget.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFIN_SUBTARGET_H
-#define BLACKFIN_SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "BlackfinGenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRef;
-
- class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
- bool sdram;
- bool icplb;
- bool wa_mi_shift;
- bool wa_csync;
- bool wa_specld;
- bool wa_mmr_stall;
- bool wa_lcregs;
- bool wa_hwloop;
- bool wa_ind_call;
- bool wa_killed_mmr;
- bool wa_rets;
- public:
- BlackfinSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
deleted file mode 100644
index a4ae46b90fa0..000000000000
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinTargetMachine.h"
-#include "Blackfin.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeBlackfinTarget() {
- RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
-}
-
-BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
- StringRef TT,
- StringRef CPU,
- StringRef FS,
- Reloc::Model RM,
- CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
- DataLayout("e-p:32:32-i64:32-f64:32-n32"),
- Subtarget(TT, CPU, FS),
- TLInfo(*this),
- TSInfo(*this),
- InstrInfo(Subtarget),
- FrameLowering(Subtarget) {
-}
-
-bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createBlackfinISelDag(*this, OptLevel));
- return false;
-}
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
deleted file mode 100644
index c85337fe237f..000000000000
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Blackfin specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINTARGETMACHINE_H
-#define BLACKFINTARGETMACHINE_H
-
-#include "BlackfinInstrInfo.h"
-#include "BlackfinIntrinsicInfo.h"
-#include "BlackfinISelLowering.h"
-#include "BlackfinFrameLowering.h"
-#include "BlackfinSubtarget.h"
-#include "BlackfinSelectionDAGInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
- class BlackfinTargetMachine : public LLVMTargetMachine {
- const TargetData DataLayout;
- BlackfinSubtarget Subtarget;
- BlackfinTargetLowering TLInfo;
- BlackfinSelectionDAGInfo TSInfo;
- BlackfinInstrInfo InstrInfo;
- BlackfinFrameLowering FrameLowering;
- BlackfinIntrinsicInfo IntrinsicInfo;
- public:
- BlackfinTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
-
- virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameLowering *getFrameLowering() const {
- return &FrameLowering;
- }
- virtual const BlackfinSubtarget *getSubtargetImpl() const {
- return &Subtarget;
- }
- virtual const BlackfinRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
- virtual const BlackfinTargetLowering* getTargetLowering() const {
- return &TLInfo;
- }
- virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
- virtual const TargetData *getTargetData() const { return &DataLayout; }
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- const TargetIntrinsicInfo *getIntrinsicInfo() const {
- return &IntrinsicInfo;
- }
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
deleted file mode 100644
index 94d05fbf8878..000000000000
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Blackfin.td)
-
-llvm_tablegen(BlackfinGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(BlackfinGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
-add_public_tablegen_target(BlackfinCommonTableGen)
-
-add_llvm_target(BlackfinCodeGen
- BlackfinAsmPrinter.cpp
- BlackfinInstrInfo.cpp
- BlackfinIntrinsicInfo.cpp
- BlackfinISelDAGToDAG.cpp
- BlackfinISelLowering.cpp
- BlackfinFrameLowering.cpp
- BlackfinRegisterInfo.cpp
- BlackfinSubtarget.cpp
- BlackfinTargetMachine.cpp
- BlackfinSelectionDAGInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMBlackfinCodeGen
- LLVMAsmPrinter
- LLVMBlackfinDesc
- LLVMBlackfinInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
deleted file mode 100644
index 5b9d4a29794e..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the BlackfinMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinMCAsmInfo.h"
-
-using namespace llvm;
-
-BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
- GlobalPrefix = "_";
- CommentString = "//";
- HasSetDirective = false;
-}
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
deleted file mode 100644
index c372aa247e04..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the BlackfinMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINTARGETASMINFO_H
-#define BLACKFINTARGETASMINFO_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
- class Target;
-
- struct BlackfinMCAsmInfo : public MCAsmInfo {
- explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
- };
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
deleted file mode 100644
index 272e3c2bbb75..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Blackfin specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinMCTargetDesc.h"
-#include "BlackfinMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "BlackfinGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "BlackfinGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "BlackfinGenRegisterInfo.inc"
-
-using namespace llvm;
-
-
-static MCInstrInfo *createBlackfinMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitBlackfinMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitBlackfinMCRegisterInfo(X, BF::RETS);
- return X;
-}
-
-static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
- StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
- return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeBlackfinTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget,
- createBlackfinMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
- createBlackfinMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget,
- createBlackfinMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
- createBlackfinMCSubtargetInfo);
-}
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
deleted file mode 100644
index 5bffe94fc582..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Blackfin specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINMCTARGETDESC_H
-#define BLACKFINMCTARGETDESC_H
-
-namespace llvm {
-class MCSubtargetInfo;
-class Target;
-class StringRef;
-
-extern Target TheBlackfinTarget;
-
-} // End llvm namespace
-
-// Defines symbolic names for Blackfin registers. This defines a mapping from
-// register name to register number.
-#define GET_REGINFO_ENUM
-#include "BlackfinGenRegisterInfo.inc"
-
-// Defines symbolic names for the Blackfin instructions.
-#define GET_INSTRINFO_ENUM
-#include "BlackfinGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "BlackfinGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 73315d852cbd..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-add_llvm_library(LLVMBlackfinDesc
- BlackfinMCTargetDesc.cpp
- BlackfinMCAsmInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMBlackfinDesc
- LLVMBlackfinInfo
- LLVMMC
- )
-
-add_dependencies(LLVMBlackfinDesc BlackfinCommonTableGen)
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
deleted file mode 100644
index 756ac6bcd8a0..000000000000
--- a/lib/Target/Blackfin/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMBlackfinCodeGen
-TARGET = Blackfin
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \
- BlackfinGenAsmWriter.inc \
- BlackfinGenDAGISel.inc BlackfinGenSubtargetInfo.inc \
- BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/Blackfin/README.txt b/lib/Target/Blackfin/README.txt
deleted file mode 100644
index b4c8227cd645..000000000000
--- a/lib/Target/Blackfin/README.txt
+++ /dev/null
@@ -1,244 +0,0 @@
-//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===//
-
-* Condition codes
-** DONE Problem with asymmetric SETCC operations
-The instruction
-
- CC = R0 < 2
-
-is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC
-JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond
-(not cc), target), the DAG optimizer removes that kind of thing.
-
-This is handled by creating a pseudo-register NCC that aliases CC. Register
-classes JustCC and NotCC are used to control the inversion of CC.
-
-** DONE CC as an i32 register
-The AnyCC register class pretends to hold i32 values. It can only represent the
-values 0 and 1, but we can copy to and from the D class. This hack makes it
-possible to represent the setcc instruction without having i1 as a legal type.
-
-In most cases, the CC register is set by a "CC = .." or BITTST instruction, and
-then used in a conditional branch or move. The code generator thinks it is
-moving 32 bits, but the value stays in CC. In other cases, the result of a
-comparison is actually used as am i32 number, and CC will be copied to a D
-register.
-
-* Stack frames
-** TODO Use Push/Pop instructions
-We should use the push/pop instructions when saving callee-saved
-registers. The are smaller, and we may even use push multiple instructions.
-
-** TODO requiresRegisterScavenging
-We need more intelligence in determining when the scavenger is needed. We
-should keep track of:
-- Spilling D16 registers
-- Spilling AnyCC registers
-
-* Assembler
-** TODO Implement PrintGlobalVariable
-** TODO Remove LOAD32sym
-It's a hack combining two instructions by concatenation.
-
-* Inline Assembly
-
-These are the GCC constraints from bfin/constraints.md:
-
-| Code | Register class | LLVM |
-|-------+-------------------------------------------+------|
-| a | P | C |
-| d | D | C |
-| z | Call clobbered P (P0, P1, P2) | X |
-| D | EvenD | X |
-| W | OddD | X |
-| e | Accu | C |
-| A | A0 | S |
-| B | A1 | S |
-| b | I | C |
-| v | B | C |
-| f | M | C |
-| c | Circular I, B, L | X |
-| C | JustCC | S |
-| t | LoopTop | X |
-| u | LoopBottom | X |
-| k | LoopCount | X |
-| x | GR | C |
-| y | RET*, ASTAT, SEQSTAT, USP | X |
-| w | ALL | C |
-| Z | The FD-PIC GOT pointer (P3) | S |
-| Y | The FD-PIC function pointer register (P1) | S |
-| q0-q7 | R0-R7 individually | |
-| qA | P0 | |
-|-------+-------------------------------------------+------|
-| Code | Constant | |
-|-------+-------------------------------------------+------|
-| J | 1<<N, N<32 | |
-| Ks3 | imm3 | |
-| Ku3 | uimm3 | |
-| Ks4 | imm4 | |
-| Ku4 | uimm4 | |
-| Ks5 | imm5 | |
-| Ku5 | uimm5 | |
-| Ks7 | imm7 | |
-| KN7 | -imm7 | |
-| Ksh | imm16 | |
-| Kuh | uimm16 | |
-| L | ~(1<<N) | |
-| M1 | 0xff | |
-| M2 | 0xffff | |
-| P0-P4 | 0-4 | |
-| PA | Macflag, not M | |
-| PB | Macflag, only M | |
-| Q | Symbol | |
-
-** TODO Support all register classes
-* DAG combiner
-** Create test case for each Illegal SETCC case
-The DAG combiner may someimes produce illegal i16 SETCC instructions.
-
-*** TODO SETCC (ctlz x), 5) == const
-*** TODO SETCC (and load, const) == const
-*** DONE SETCC (zext x) == const
-*** TODO SETCC (sext x) == const
-
-* Instruction selection
-** TODO Better imediate constants
-Like ARM, build constants as small imm + shift.
-
-** TODO Implement cycle counter
-We have CYCLES and CYCLES2 registers, but the readcyclecounter intrinsic wants
-to return i64, and the code generator doesn't know how to legalize that.
-
-** TODO Instruction alternatives
-Some instructions come in different variants for example:
-
- D = D + D
- P = P + P
-
-Cross combinations are not allowed:
-
- P = D + D (bad)
-
-Similarly for the subreg pseudo-instructions:
-
- D16L = EXTRACT_SUBREG D16, bfin_subreg_lo16
- P16L = EXTRACT_SUBREG P16, bfin_subreg_lo16
-
-We want to take advantage of the alternative instructions. This could be done by
-changing the DAG after instruction selection.
-
-
-** Multipatterns for load/store
-We should try to identify multipatterns for load and store instructions. The
-available instruction matrix is a bit irregular.
-
-Loads:
-
-| Addr | D | P | D 16z | D 16s | D16 | D 8z | D 8s |
-|------------+---+---+-------+-------+-----+------+------|
-| P | * | * | * | * | * | * | * |
-| P++ | * | * | * | * | | * | * |
-| P-- | * | * | * | * | | * | * |
-| P+uimm5m2 | | | * | * | | | |
-| P+uimm6m4 | * | * | | | | | |
-| P+imm16 | | | | | | * | * |
-| P+imm17m2 | | | * | * | | | |
-| P+imm18m4 | * | * | | | | | |
-| P++P | * | | * | * | * | | |
-| FP-uimm7m4 | * | * | | | | | |
-| I | * | | | | * | | |
-| I++ | * | | | | * | | |
-| I-- | * | | | | * | | |
-| I++M | * | | | | | | |
-
-Stores:
-
-| Addr | D | P | D16H | D16L | D 8 |
-|------------+---+---+------+------+-----|
-| P | * | * | * | * | * |
-| P++ | * | * | | * | * |
-| P-- | * | * | | * | * |
-| P+uimm5m2 | | | | * | |
-| P+uimm6m4 | * | * | | | |
-| P+imm16 | | | | | * |
-| P+imm17m2 | | | | * | |
-| P+imm18m4 | * | * | | | |
-| P++P | * | | * | * | |
-| FP-uimm7m4 | * | * | | | |
-| I | * | | * | * | |
-| I++ | * | | * | * | |
-| I-- | * | | * | * | |
-| I++M | * | | | | |
-
-* Workarounds and features
-Blackfin CPUs have bugs. Each model comes in a number of silicon revisions with
-different bugs. We learn about the CPU model from the -mcpu switch.
-
-** Interpretation of -mcpu value
-- -mcpu=bf527 refers to the latest known BF527 revision
-- -mcpu=bf527-0.2 refers to silicon rev. 0.2
-- -mcpu=bf527-any refers to all known revisions
-- -mcpu=bf527-none disables all workarounds
-
-The -mcpu setting affects the __SILICON_REVISION__ macro and enabled workarounds:
-
-| -mcpu | __SILICON_REVISION__ | Workarounds |
-|------------+----------------------+--------------------|
-| bf527 | Def Latest | Specific to latest |
-| bf527-1.3 | Def 0x0103 | Specific to 1.3 |
-| bf527-any | Def 0xffff | All bf527-x.y |
-| bf527-none | Undefined | None |
-
-These are the known cores and revisions:
-
-| Core | Silicon | Processors |
-|-------------+--------------------+-------------------------|
-| Edinburgh | 0.3, 0.4, 0.5, 0.6 | BF531 BF532 BF533 |
-| Braemar | 0.2, 0.3 | BF534 BF536 BF537 |
-| Stirling | 0.3, 0.4, 0.5 | BF538 BF539 |
-| Moab | 0.0, 0.1, 0.2 | BF542 BF544 BF548 BF549 |
-| Teton | 0.3, 0.5 | BF561 |
-| Kookaburra | 0.0, 0.1, 0.2 | BF523 BF525 BF527 |
-| Mockingbird | 0.0, 0.1 | BF522 BF524 BF526 |
-| Brodie | 0.0, 0.1 | BF512 BF514 BF516 BF518 |
-
-
-** Compiler implemented workarounds
-Most workarounds are implemented in header files and source code using the
-__ADSPBF527__ macros. A few workarounds require compiler support.
-
-| Anomaly | Macro | GCC Switch |
-|----------+--------------------------------+------------------|
-| Any | __WORKAROUNDS_ENABLED | |
-| 05000074 | WA_05000074 | |
-| 05000244 | __WORKAROUND_SPECULATIVE_SYNCS | -mcsync-anomaly |
-| 05000245 | __WORKAROUND_SPECULATIVE_LOADS | -mspecld-anomaly |
-| 05000257 | WA_05000257 | |
-| 05000283 | WA_05000283 | |
-| 05000312 | WA_LOAD_LCREGS | |
-| 05000315 | WA_05000315 | |
-| 05000371 | __WORKAROUND_RETS | |
-| 05000426 | __WORKAROUND_INDIRECT_CALLS | Not -micplb |
-
-** GCC feature switches
-| Switch | Description |
-|---------------------------+----------------------------------------|
-| -msim | Use simulator runtime |
-| -momit-leaf-frame-pointer | Omit frame pointer for leaf functions |
-| -mlow64k | |
-| -mcsync-anomaly | |
-| -mspecld-anomaly | |
-| -mid-shared-library | |
-| -mleaf-id-shared-library | |
-| -mshared-library-id= | |
-| -msep-data | Enable separate data segment |
-| -mlong-calls | Use indirect calls |
-| -mfast-fp | |
-| -mfdpic | |
-| -minline-plt | |
-| -mstack-check-l1 | Do stack checking in L1 scratch memory |
-| -mmulticore | Enable multicore support |
-| -mcorea | Build for Core A |
-| -mcoreb | Build for Core B |
-| -msdram | Build for SDRAM |
-| -micplb | Assume ICPLBs are enabled at runtime. |
diff --git a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
deleted file mode 100644
index 57f1d3e95fbf..000000000000
--- a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Blackfin.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-Target llvm::TheBlackfinTarget;
-
-extern "C" void LLVMInitializeBlackfinTargetInfo() {
- RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
- "Analog Devices Blackfin [experimental]");
-}
diff --git a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 771f092eb062..000000000000
--- a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMBlackfinInfo
- BlackfinTargetInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMBlackfinInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
-add_dependencies(LLVMBlackfinInfo BlackfinCommonTableGen)
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
deleted file mode 100644
index 69d8c46a5024..000000000000
--- a/lib/Target/CBackend/CBackend.cpp
+++ /dev/null
@@ -1,3617 +0,0 @@
-//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This library converts LLVM code to C code, compilable by GCC and other C
-// compilers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CTargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/ConstantsScanner.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Config/config.h"
-#include <algorithm>
-// Some ms header decided to define setjmp as _setjmp, undo this for this file.
-#ifdef _MSC_VER
-#undef setjmp
-#endif
-using namespace llvm;
-
-extern "C" void LLVMInitializeCBackendTarget() {
- // Register the target.
- RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
-}
-
-namespace {
- class CBEMCAsmInfo : public MCAsmInfo {
- public:
- CBEMCAsmInfo() {
- GlobalPrefix = "";
- PrivateGlobalPrefix = "";
- }
- };
-
- /// CWriter - This class is the main chunk of code that converts an LLVM
- /// module to a C translation unit.
- class CWriter : public FunctionPass, public InstVisitor<CWriter> {
- formatted_raw_ostream &Out;
- IntrinsicLowering *IL;
- Mangler *Mang;
- LoopInfo *LI;
- const Module *TheModule;
- const MCAsmInfo* TAsm;
- const MCRegisterInfo *MRI;
- const MCObjectFileInfo *MOFI;
- MCContext *TCtx;
- const TargetData* TD;
-
- std::map<const ConstantFP *, unsigned> FPConstantMap;
- std::set<Function*> intrinsicPrototypesAlreadyGenerated;
- std::set<const Argument*> ByValParams;
- unsigned FPCounter;
- unsigned OpaqueCounter;
- DenseMap<const Value*, unsigned> AnonValueNumbers;
- unsigned NextAnonValueNumber;
-
- /// UnnamedStructIDs - This contains a unique ID for each struct that is
- /// either anonymous or has no name.
- DenseMap<StructType*, unsigned> UnnamedStructIDs;
-
- public:
- static char ID;
- explicit CWriter(formatted_raw_ostream &o)
- : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
- TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0),
- OpaqueCounter(0), NextAnonValueNumber(0) {
- initializeLoopInfoPass(*PassRegistry::getPassRegistry());
- FPCounter = 0;
- }
-
- virtual const char *getPassName() const { return "C backend"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
- AU.setPreservesAll();
- }
-
- virtual bool doInitialization(Module &M);
-
- bool runOnFunction(Function &F) {
- // Do not codegen any 'available_externally' functions at all, they have
- // definitions outside the translation unit.
- if (F.hasAvailableExternallyLinkage())
- return false;
-
- LI = &getAnalysis<LoopInfo>();
-
- // Get rid of intrinsics we can't handle.
- lowerIntrinsics(F);
-
- // Output all floating point constants that cannot be printed accurately.
- printFloatingPointConstants(F);
-
- printFunction(F);
- return false;
- }
-
- virtual bool doFinalization(Module &M) {
- // Free memory...
- delete IL;
- delete TD;
- delete Mang;
- delete TCtx;
- delete TAsm;
- delete MRI;
- delete MOFI;
- FPConstantMap.clear();
- ByValParams.clear();
- intrinsicPrototypesAlreadyGenerated.clear();
- UnnamedStructIDs.clear();
- return false;
- }
-
- raw_ostream &printType(raw_ostream &Out, Type *Ty,
- bool isSigned = false,
- const std::string &VariableName = "",
- bool IgnoreName = false,
- const AttrListPtr &PAL = AttrListPtr());
- raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty,
- bool isSigned,
- const std::string &NameSoFar = "");
-
- void printStructReturnPointerFunctionType(raw_ostream &Out,
- const AttrListPtr &PAL,
- PointerType *Ty);
-
- std::string getStructName(StructType *ST);
-
- /// writeOperandDeref - Print the result of dereferencing the specified
- /// operand with '*'. This is equivalent to printing '*' then using
- /// writeOperand, but avoids excess syntax in some cases.
- void writeOperandDeref(Value *Operand) {
- if (isAddressExposed(Operand)) {
- // Already something with an address exposed.
- writeOperandInternal(Operand);
- } else {
- Out << "*(";
- writeOperand(Operand);
- Out << ")";
- }
- }
-
- void writeOperand(Value *Operand, bool Static = false);
- void writeInstComputationInline(Instruction &I);
- void writeOperandInternal(Value *Operand, bool Static = false);
- void writeOperandWithCast(Value* Operand, unsigned Opcode);
- void writeOperandWithCast(Value* Operand, const ICmpInst &I);
- bool writeInstructionCast(const Instruction &I);
-
- void writeMemoryAccess(Value *Operand, Type *OperandType,
- bool IsVolatile, unsigned Alignment);
-
- private :
- std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
-
- void lowerIntrinsics(Function &F);
- /// Prints the definition of the intrinsic function F. Supports the
- /// intrinsics which need to be explicitly defined in the CBackend.
- void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
-
- void printModuleTypes();
- void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &);
- void printFloatingPointConstants(Function &F);
- void printFloatingPointConstants(const Constant *C);
- void printFunctionSignature(const Function *F, bool Prototype);
-
- void printFunction(Function &);
- void printBasicBlock(BasicBlock *BB);
- void printLoop(Loop *L);
-
- void printCast(unsigned opcode, Type *SrcTy, Type *DstTy);
- void printConstant(Constant *CPV, bool Static);
- void printConstantWithCast(Constant *CPV, unsigned Opcode);
- bool printConstExprCast(const ConstantExpr *CE, bool Static);
- void printConstantArray(ConstantArray *CPA, bool Static);
- void printConstantVector(ConstantVector *CV, bool Static);
-
- /// isAddressExposed - Return true if the specified value's name needs to
- /// have its address taken in order to get a C value of the correct type.
- /// This happens for global variables, byval parameters, and direct allocas.
- bool isAddressExposed(const Value *V) const {
- if (const Argument *A = dyn_cast<Argument>(V))
- return ByValParams.count(A);
- return isa<GlobalVariable>(V) || isDirectAlloca(V);
- }
-
- // isInlinableInst - Attempt to inline instructions into their uses to build
- // trees as much as possible. To do this, we have to consistently decide
- // what is acceptable to inline, so that variable declarations don't get
- // printed and an extra copy of the expr is not emitted.
- //
- static bool isInlinableInst(const Instruction &I) {
- // Always inline cmp instructions, even if they are shared by multiple
- // expressions. GCC generates horrible code if we don't.
- if (isa<CmpInst>(I))
- return true;
-
- // Must be an expression, must be used exactly once. If it is dead, we
- // emit it inline where it would go.
- if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
- isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
- isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
- isa<InsertValueInst>(I))
- // Don't inline a load across a store or other bad things!
- return false;
-
- // Must not be used in inline asm, extractelement, or shufflevector.
- if (I.hasOneUse()) {
- const Instruction &User = cast<Instruction>(*I.use_back());
- if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
- isa<ShuffleVectorInst>(User))
- return false;
- }
-
- // Only inline instruction it if it's use is in the same BB as the inst.
- return I.getParent() == cast<Instruction>(I.use_back())->getParent();
- }
-
- // isDirectAlloca - Define fixed sized allocas in the entry block as direct
- // variables which are accessed with the & operator. This causes GCC to
- // generate significantly better code than to emit alloca calls directly.
- //
- static const AllocaInst *isDirectAlloca(const Value *V) {
- const AllocaInst *AI = dyn_cast<AllocaInst>(V);
- if (!AI) return 0;
- if (AI->isArrayAllocation())
- return 0; // FIXME: we can also inline fixed size array allocas!
- if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
- return 0;
- return AI;
- }
-
- // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
- static bool isInlineAsm(const Instruction& I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- return isa<InlineAsm>(CI->getCalledValue());
- return false;
- }
-
- // Instruction visitation functions
- friend class InstVisitor<CWriter>;
-
- void visitReturnInst(ReturnInst &I);
- void visitBranchInst(BranchInst &I);
- void visitSwitchInst(SwitchInst &I);
- void visitIndirectBrInst(IndirectBrInst &I);
- void visitInvokeInst(InvokeInst &I) {
- llvm_unreachable("Lowerinvoke pass didn't work!");
- }
- void visitUnwindInst(UnwindInst &I) {
- llvm_unreachable("Lowerinvoke pass didn't work!");
- }
- void visitResumeInst(ResumeInst &I) {
- llvm_unreachable("DwarfEHPrepare pass didn't work!");
- }
- void visitUnreachableInst(UnreachableInst &I);
-
- void visitPHINode(PHINode &I);
- void visitBinaryOperator(Instruction &I);
- void visitICmpInst(ICmpInst &I);
- void visitFCmpInst(FCmpInst &I);
-
- void visitCastInst (CastInst &I);
- void visitSelectInst(SelectInst &I);
- void visitCallInst (CallInst &I);
- void visitInlineAsm(CallInst &I);
- bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
-
- void visitAllocaInst(AllocaInst &I);
- void visitLoadInst (LoadInst &I);
- void visitStoreInst (StoreInst &I);
- void visitGetElementPtrInst(GetElementPtrInst &I);
- void visitVAArgInst (VAArgInst &I);
-
- void visitInsertElementInst(InsertElementInst &I);
- void visitExtractElementInst(ExtractElementInst &I);
- void visitShuffleVectorInst(ShuffleVectorInst &SVI);
-
- void visitInsertValueInst(InsertValueInst &I);
- void visitExtractValueInst(ExtractValueInst &I);
-
- void visitInstruction(Instruction &I) {
-#ifndef NDEBUG
- errs() << "C Writer does not know about " << I;
-#endif
- llvm_unreachable(0);
- }
-
- void outputLValue(Instruction *I) {
- Out << " " << GetValueName(I) << " = ";
- }
-
- bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
- void printPHICopiesForSuccessor(BasicBlock *CurBlock,
- BasicBlock *Successor, unsigned Indent);
- void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
- unsigned Indent);
- void printGEPExpression(Value *Ptr, gep_type_iterator I,
- gep_type_iterator E, bool Static);
-
- std::string GetValueName(const Value *Operand);
- };
-}
-
-char CWriter::ID = 0;
-
-
-
-static std::string CBEMangle(const std::string &S) {
- std::string Result;
-
- for (unsigned i = 0, e = S.size(); i != e; ++i)
- if (isalnum(S[i]) || S[i] == '_') {
- Result += S[i];
- } else {
- Result += '_';
- Result += 'A'+(S[i]&15);
- Result += 'A'+((S[i]>>4)&15);
- Result += '_';
- }
- return Result;
-}
-
-std::string CWriter::getStructName(StructType *ST) {
- if (!ST->isLiteral() && !ST->getName().empty())
- return CBEMangle("l_"+ST->getName().str());
-
- return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
-}
-
-
-/// printStructReturnPointerFunctionType - This is like printType for a struct
-/// return type, except, instead of printing the type as void (*)(Struct*, ...)
-/// print it as "Struct (*)(...)", for struct return functions.
-void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
- const AttrListPtr &PAL,
- PointerType *TheTy) {
- FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
- std::string tstr;
- raw_string_ostream FunctionInnards(tstr);
- FunctionInnards << " (*) (";
- bool PrintedType = false;
-
- FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
- Type *RetTy = cast<PointerType>(*I)->getElementType();
- unsigned Idx = 1;
- for (++I, ++Idx; I != E; ++I, ++Idx) {
- if (PrintedType)
- FunctionInnards << ", ";
- Type *ArgTy = *I;
- if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(ArgTy->isPointerTy());
- ArgTy = cast<PointerType>(ArgTy)->getElementType();
- }
- printType(FunctionInnards, ArgTy,
- /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
- PrintedType = true;
- }
- if (FTy->isVarArg()) {
- if (!PrintedType)
- FunctionInnards << " int"; //dummy argument for empty vararg functs
- FunctionInnards << ", ...";
- } else if (!PrintedType) {
- FunctionInnards << "void";
- }
- FunctionInnards << ')';
- printType(Out, RetTy,
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
-}
-
-raw_ostream &
-CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned,
- const std::string &NameSoFar) {
- assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
- "Invalid type for printSimpleType");
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return Out << "void " << NameSoFar;
- case Type::IntegerTyID: {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits == 1)
- return Out << "bool " << NameSoFar;
- else if (NumBits <= 8)
- return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
- else if (NumBits <= 16)
- return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
- else if (NumBits <= 32)
- return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
- else if (NumBits <= 64)
- return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
- else {
- assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
- return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
- }
- }
- case Type::FloatTyID: return Out << "float " << NameSoFar;
- case Type::DoubleTyID: return Out << "double " << NameSoFar;
- // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
- // present matches host 'long double'.
- case Type::X86_FP80TyID:
- case Type::PPC_FP128TyID:
- case Type::FP128TyID: return Out << "long double " << NameSoFar;
-
- case Type::X86_MMXTyID:
- return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
- " __attribute__((vector_size(64))) " + NameSoFar);
-
- case Type::VectorTyID: {
- VectorType *VTy = cast<VectorType>(Ty);
- return printSimpleType(Out, VTy->getElementType(), isSigned,
- " __attribute__((vector_size(" +
- utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
- }
-
- default:
-#ifndef NDEBUG
- errs() << "Unknown primitive type: " << *Ty << "\n";
-#endif
- llvm_unreachable(0);
- }
-}
-
-// Pass the Type* and the variable name and this prints out the variable
-// declaration.
-//
-raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
- bool isSigned, const std::string &NameSoFar,
- bool IgnoreName, const AttrListPtr &PAL) {
- if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
- printSimpleType(Out, Ty, isSigned, NameSoFar);
- return Out;
- }
-
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: {
- FunctionType *FTy = cast<FunctionType>(Ty);
- std::string tstr;
- raw_string_ostream FunctionInnards(tstr);
- FunctionInnards << " (" << NameSoFar << ") (";
- unsigned Idx = 1;
- for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I) {
- Type *ArgTy = *I;
- if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(ArgTy->isPointerTy());
- ArgTy = cast<PointerType>(ArgTy)->getElementType();
- }
- if (I != FTy->param_begin())
- FunctionInnards << ", ";
- printType(FunctionInnards, ArgTy,
- /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
- ++Idx;
- }
- if (FTy->isVarArg()) {
- if (!FTy->getNumParams())
- FunctionInnards << " int"; //dummy argument for empty vaarg functs
- FunctionInnards << ", ...";
- } else if (!FTy->getNumParams()) {
- FunctionInnards << "void";
- }
- FunctionInnards << ')';
- printType(Out, FTy->getReturnType(),
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
- return Out;
- }
- case Type::StructTyID: {
- StructType *STy = cast<StructType>(Ty);
-
- // Check to see if the type is named.
- if (!IgnoreName)
- return Out << getStructName(STy) << ' ' << NameSoFar;
-
- Out << NameSoFar + " {\n";
- unsigned Idx = 0;
- for (StructType::element_iterator I = STy->element_begin(),
- E = STy->element_end(); I != E; ++I) {
- Out << " ";
- printType(Out, *I, false, "field" + utostr(Idx++));
- Out << ";\n";
- }
- Out << '}';
- if (STy->isPacked())
- Out << " __attribute__ ((packed))";
- return Out;
- }
-
- case Type::PointerTyID: {
- PointerType *PTy = cast<PointerType>(Ty);
- std::string ptrName = "*" + NameSoFar;
-
- if (PTy->getElementType()->isArrayTy() ||
- PTy->getElementType()->isVectorTy())
- ptrName = "(" + ptrName + ")";
-
- if (!PAL.isEmpty())
- // Must be a function ptr cast!
- return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
- return printType(Out, PTy->getElementType(), false, ptrName);
- }
-
- case Type::ArrayTyID: {
- ArrayType *ATy = cast<ArrayType>(Ty);
- unsigned NumElements = ATy->getNumElements();
- if (NumElements == 0) NumElements = 1;
- // Arrays are wrapped in structs to allow them to have normal
- // value semantics (avoiding the array "decay").
- Out << NameSoFar << " { ";
- printType(Out, ATy->getElementType(), false,
- "array[" + utostr(NumElements) + "]");
- return Out << "; }";
- }
-
- default:
- llvm_unreachable("Unhandled case in getTypeProps!");
- }
-
- return Out;
-}
-
-void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
-
- // As a special case, print the array as a string if it is an array of
- // ubytes or an array of sbytes with positive values.
- //
- Type *ETy = CPA->getType()->getElementType();
- bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
- ETy == Type::getInt8Ty(CPA->getContext()));
-
- // Make sure the last character is a null char, as automatically added by C
- if (isString && (CPA->getNumOperands() == 0 ||
- !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
- isString = false;
-
- if (isString) {
- Out << '\"';
- // Keep track of whether the last number was a hexadecimal escape.
- bool LastWasHex = false;
-
- // Do not include the last character, which we know is null
- for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
- unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
-
- // Print it out literally if it is a printable character. The only thing
- // to be careful about is when the last letter output was a hex escape
- // code, in which case we have to be careful not to print out hex digits
- // explicitly (the C compiler thinks it is a continuation of the previous
- // character, sheesh...)
- //
- if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
- LastWasHex = false;
- if (C == '"' || C == '\\')
- Out << "\\" << (char)C;
- else
- Out << (char)C;
- } else {
- LastWasHex = false;
- switch (C) {
- case '\n': Out << "\\n"; break;
- case '\t': Out << "\\t"; break;
- case '\r': Out << "\\r"; break;
- case '\v': Out << "\\v"; break;
- case '\a': Out << "\\a"; break;
- case '\"': Out << "\\\""; break;
- case '\'': Out << "\\\'"; break;
- default:
- Out << "\\x";
- Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
- Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
- LastWasHex = true;
- break;
- }
- }
- }
- Out << '\"';
- } else {
- Out << '{';
- if (CPA->getNumOperands()) {
- Out << ' ';
- printConstant(cast<Constant>(CPA->getOperand(0)), Static);
- for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
- Out << ", ";
- printConstant(cast<Constant>(CPA->getOperand(i)), Static);
- }
- }
- Out << " }";
- }
-}
-
-void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
- Out << '{';
- if (CP->getNumOperands()) {
- Out << ' ';
- printConstant(cast<Constant>(CP->getOperand(0)), Static);
- for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
- Out << ", ";
- printConstant(cast<Constant>(CP->getOperand(i)), Static);
- }
- }
- Out << " }";
-}
-
-// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
-// textually as a double (rather than as a reference to a stack-allocated
-// variable). We decide this by converting CFP to a string and back into a
-// double, and then checking whether the conversion results in a bit-equal
-// double to the original value of CFP. This depends on us and the target C
-// compiler agreeing on the conversion process (which is pretty likely since we
-// only deal in IEEE FP).
-//
-static bool isFPCSafeToPrint(const ConstantFP *CFP) {
- bool ignored;
- // Do long doubles in hex for now.
- if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
- CFP->getType() != Type::getDoubleTy(CFP->getContext()))
- return false;
- APFloat APF = APFloat(CFP->getValueAPF()); // copy
- if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
-#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
- char Buffer[100];
- sprintf(Buffer, "%a", APF.convertToDouble());
- if (!strncmp(Buffer, "0x", 2) ||
- !strncmp(Buffer, "-0x", 3) ||
- !strncmp(Buffer, "+0x", 3))
- return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
- return false;
-#else
- std::string StrVal = ftostr(APF);
-
- while (StrVal[0] == ' ')
- StrVal.erase(StrVal.begin());
-
- // Check to make sure that the stringized number is not some string like "Inf"
- // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
- if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
- ((StrVal[0] == '-' || StrVal[0] == '+') &&
- (StrVal[1] >= '0' && StrVal[1] <= '9')))
- // Reparse stringized version!
- return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
- return false;
-#endif
-}
-
-/// Print out the casting for a cast operation. This does the double casting
-/// necessary for conversion to the destination type, if necessary.
-/// @brief Print a cast
-void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
- // Print the destination type cast
- switch (opc) {
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::IntToPtr:
- case Instruction::Trunc:
- case Instruction::BitCast:
- case Instruction::FPExt:
- case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
- Out << '(';
- printType(Out, DstTy);
- Out << ')';
- break;
- case Instruction::ZExt:
- case Instruction::PtrToInt:
- case Instruction::FPToUI: // For these, make sure we get an unsigned dest
- Out << '(';
- printSimpleType(Out, DstTy, false);
- Out << ')';
- break;
- case Instruction::SExt:
- case Instruction::FPToSI: // For these, make sure we get a signed dest
- Out << '(';
- printSimpleType(Out, DstTy, true);
- Out << ')';
- break;
- default:
- llvm_unreachable("Invalid cast opcode");
- }
-
- // Print the source type cast
- switch (opc) {
- case Instruction::UIToFP:
- case Instruction::ZExt:
- Out << '(';
- printSimpleType(Out, SrcTy, false);
- Out << ')';
- break;
- case Instruction::SIToFP:
- case Instruction::SExt:
- Out << '(';
- printSimpleType(Out, SrcTy, true);
- Out << ')';
- break;
- case Instruction::IntToPtr:
- case Instruction::PtrToInt:
- // Avoid "cast to pointer from integer of different size" warnings
- Out << "(unsigned long)";
- break;
- case Instruction::Trunc:
- case Instruction::BitCast:
- case Instruction::FPExt:
- case Instruction::FPTrunc:
- case Instruction::FPToSI:
- case Instruction::FPToUI:
- break; // These don't need a source cast.
- default:
- llvm_unreachable("Invalid cast opcode");
- break;
- }
-}
-
-// printConstant - The LLVM Constant to C Constant converter.
-void CWriter::printConstant(Constant *CPV, bool Static) {
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
- switch (CE->getOpcode()) {
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- Out << "(";
- printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
- if (CE->getOpcode() == Instruction::SExt &&
- CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
- // Make sure we really sext from bool here by subtracting from 0
- Out << "0-";
- }
- printConstant(CE->getOperand(0), Static);
- if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
- (CE->getOpcode() == Instruction::Trunc ||
- CE->getOpcode() == Instruction::FPToUI ||
- CE->getOpcode() == Instruction::FPToSI ||
- CE->getOpcode() == Instruction::PtrToInt)) {
- // Make sure we really truncate to bool here by anding with 1
- Out << "&1u";
- }
- Out << ')';
- return;
-
- case Instruction::GetElementPtr:
- Out << "(";
- printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
- gep_type_end(CPV), Static);
- Out << ")";
- return;
- case Instruction::Select:
- Out << '(';
- printConstant(CE->getOperand(0), Static);
- Out << '?';
- printConstant(CE->getOperand(1), Static);
- Out << ':';
- printConstant(CE->getOperand(2), Static);
- Out << ')';
- return;
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::ICmp:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- {
- Out << '(';
- bool NeedsClosingParens = printConstExprCast(CE, Static);
- printConstantWithCast(CE->getOperand(0), CE->getOpcode());
- switch (CE->getOpcode()) {
- case Instruction::Add:
- case Instruction::FAdd: Out << " + "; break;
- case Instruction::Sub:
- case Instruction::FSub: Out << " - "; break;
- case Instruction::Mul:
- case Instruction::FMul: Out << " * "; break;
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem: Out << " % "; break;
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv: Out << " / "; break;
- case Instruction::And: Out << " & "; break;
- case Instruction::Or: Out << " | "; break;
- case Instruction::Xor: Out << " ^ "; break;
- case Instruction::Shl: Out << " << "; break;
- case Instruction::LShr:
- case Instruction::AShr: Out << " >> "; break;
- case Instruction::ICmp:
- switch (CE->getPredicate()) {
- case ICmpInst::ICMP_EQ: Out << " == "; break;
- case ICmpInst::ICMP_NE: Out << " != "; break;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_ULT: Out << " < "; break;
- case ICmpInst::ICMP_SLE:
- case ICmpInst::ICMP_ULE: Out << " <= "; break;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_UGT: Out << " > "; break;
- case ICmpInst::ICMP_SGE:
- case ICmpInst::ICMP_UGE: Out << " >= "; break;
- default: llvm_unreachable("Illegal ICmp predicate");
- }
- break;
- default: llvm_unreachable("Illegal opcode here!");
- }
- printConstantWithCast(CE->getOperand(1), CE->getOpcode());
- if (NeedsClosingParens)
- Out << "))";
- Out << ')';
- return;
- }
- case Instruction::FCmp: {
- Out << '(';
- bool NeedsClosingParens = printConstExprCast(CE, Static);
- if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
- Out << "0";
- else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
- Out << "1";
- else {
- const char* op = 0;
- switch (CE->getPredicate()) {
- default: llvm_unreachable("Illegal FCmp predicate");
- case FCmpInst::FCMP_ORD: op = "ord"; break;
- case FCmpInst::FCMP_UNO: op = "uno"; break;
- case FCmpInst::FCMP_UEQ: op = "ueq"; break;
- case FCmpInst::FCMP_UNE: op = "une"; break;
- case FCmpInst::FCMP_ULT: op = "ult"; break;
- case FCmpInst::FCMP_ULE: op = "ule"; break;
- case FCmpInst::FCMP_UGT: op = "ugt"; break;
- case FCmpInst::FCMP_UGE: op = "uge"; break;
- case FCmpInst::FCMP_OEQ: op = "oeq"; break;
- case FCmpInst::FCMP_ONE: op = "one"; break;
- case FCmpInst::FCMP_OLT: op = "olt"; break;
- case FCmpInst::FCMP_OLE: op = "ole"; break;
- case FCmpInst::FCMP_OGT: op = "ogt"; break;
- case FCmpInst::FCMP_OGE: op = "oge"; break;
- }
- Out << "llvm_fcmp_" << op << "(";
- printConstantWithCast(CE->getOperand(0), CE->getOpcode());
- Out << ", ";
- printConstantWithCast(CE->getOperand(1), CE->getOpcode());
- Out << ")";
- }
- if (NeedsClosingParens)
- Out << "))";
- Out << ')';
- return;
- }
- default:
-#ifndef NDEBUG
- errs() << "CWriter Error: Unhandled constant expression: "
- << *CE << "\n";
-#endif
- llvm_unreachable(0);
- }
- } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
- Out << "((";
- printType(Out, CPV->getType()); // sign doesn't matter
- Out << ")/*UNDEF*/";
- if (!CPV->getType()->isVectorTy()) {
- Out << "0)";
- } else {
- Out << "{})";
- }
- return;
- }
-
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- Type* Ty = CI->getType();
- if (Ty == Type::getInt1Ty(CPV->getContext()))
- Out << (CI->getZExtValue() ? '1' : '0');
- else if (Ty == Type::getInt32Ty(CPV->getContext()))
- Out << CI->getZExtValue() << 'u';
- else if (Ty->getPrimitiveSizeInBits() > 32)
- Out << CI->getZExtValue() << "ull";
- else {
- Out << "((";
- printSimpleType(Out, Ty, false) << ')';
- if (CI->isMinValue(true))
- Out << CI->getZExtValue() << 'u';
- else
- Out << CI->getSExtValue();
- Out << ')';
- }
- return;
- }
-
- switch (CPV->getType()->getTypeID()) {
- case Type::FloatTyID:
- case Type::DoubleTyID:
- case Type::X86_FP80TyID:
- case Type::PPC_FP128TyID:
- case Type::FP128TyID: {
- ConstantFP *FPC = cast<ConstantFP>(CPV);
- std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
- if (I != FPConstantMap.end()) {
- // Because of FP precision problems we must load from a stack allocated
- // value that holds the value in hex.
- Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
- "float" :
- FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
- "double" :
- "long double")
- << "*)&FPConstant" << I->second << ')';
- } else {
- double V;
- if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
- V = FPC->getValueAPF().convertToFloat();
- else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
- V = FPC->getValueAPF().convertToDouble();
- else {
- // Long double. Convert the number to double, discarding precision.
- // This is not awesome, but it at least makes the CBE output somewhat
- // useful.
- APFloat Tmp = FPC->getValueAPF();
- bool LosesInfo;
- Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
- V = Tmp.convertToDouble();
- }
-
- if (IsNAN(V)) {
- // The value is NaN
-
- // FIXME the actual NaN bits should be emitted.
- // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
- // it's 0x7ff4.
- const unsigned long QuietNaN = 0x7ff8UL;
- //const unsigned long SignalNaN = 0x7ff4UL;
-
- // We need to grab the first part of the FP #
- char Buffer[100];
-
- uint64_t ll = DoubleToBits(V);
- sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
-
- std::string Num(&Buffer[0], &Buffer[6]);
- unsigned long Val = strtoul(Num.c_str(), 0, 16);
-
- if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
- Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
- << Buffer << "\") /*nan*/ ";
- else
- Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
- << Buffer << "\") /*nan*/ ";
- } else if (IsInf(V)) {
- // The value is Inf
- if (V < 0) Out << '-';
- Out << "LLVM_INF" <<
- (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
- << " /*inf*/ ";
- } else {
- std::string Num;
-#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
- // Print out the constant as a floating point number.
- char Buffer[100];
- sprintf(Buffer, "%a", V);
- Num = Buffer;
-#else
- Num = ftostr(FPC->getValueAPF());
-#endif
- Out << Num;
- }
- }
- break;
- }
-
- case Type::ArrayTyID:
- // Use C99 compound expression literal initializer syntax.
- if (!Static) {
- Out << "(";
- printType(Out, CPV->getType());
- Out << ")";
- }
- Out << "{ "; // Arrays are wrapped in struct types.
- if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
- printConstantArray(CA, Static);
- } else {
- assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
- ArrayType *AT = cast<ArrayType>(CPV->getType());
- Out << '{';
- if (AT->getNumElements()) {
- Out << ' ';
- Constant *CZ = Constant::getNullValue(AT->getElementType());
- printConstant(CZ, Static);
- for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
- Out << ", ";
- printConstant(CZ, Static);
- }
- }
- Out << " }";
- }
- Out << " }"; // Arrays are wrapped in struct types.
- break;
-
- case Type::VectorTyID:
- // Use C99 compound expression literal initializer syntax.
- if (!Static) {
- Out << "(";
- printType(Out, CPV->getType());
- Out << ")";
- }
- if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
- printConstantVector(CV, Static);
- } else {
- assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
- VectorType *VT = cast<VectorType>(CPV->getType());
- Out << "{ ";
- Constant *CZ = Constant::getNullValue(VT->getElementType());
- printConstant(CZ, Static);
- for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
- Out << ", ";
- printConstant(CZ, Static);
- }
- Out << " }";
- }
- break;
-
- case Type::StructTyID:
- // Use C99 compound expression literal initializer syntax.
- if (!Static) {
- Out << "(";
- printType(Out, CPV->getType());
- Out << ")";
- }
- if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
- StructType *ST = cast<StructType>(CPV->getType());
- Out << '{';
- if (ST->getNumElements()) {
- Out << ' ';
- printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
- for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
- Out << ", ";
- printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
- }
- }
- Out << " }";
- } else {
- Out << '{';
- if (CPV->getNumOperands()) {
- Out << ' ';
- printConstant(cast<Constant>(CPV->getOperand(0)), Static);
- for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
- Out << ", ";
- printConstant(cast<Constant>(CPV->getOperand(i)), Static);
- }
- }
- Out << " }";
- }
- break;
-
- case Type::PointerTyID:
- if (isa<ConstantPointerNull>(CPV)) {
- Out << "((";
- printType(Out, CPV->getType()); // sign doesn't matter
- Out << ")/*NULL*/0)";
- break;
- } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
- writeOperand(GV, Static);
- break;
- }
- // FALL THROUGH
- default:
-#ifndef NDEBUG
- errs() << "Unknown constant type: " << *CPV << "\n";
-#endif
- llvm_unreachable(0);
- }
-}
-
-// Some constant expressions need to be casted back to the original types
-// because their operands were casted to the expected type. This function takes
-// care of detecting that case and printing the cast for the ConstantExpr.
-bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
- bool NeedsExplicitCast = false;
- Type *Ty = CE->getOperand(0)->getType();
- bool TypeIsSigned = false;
- switch (CE->getOpcode()) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- // We need to cast integer arithmetic so that it is always performed
- // as unsigned, to avoid undefined behavior on overflow.
- case Instruction::LShr:
- case Instruction::URem:
- case Instruction::UDiv: NeedsExplicitCast = true; break;
- case Instruction::AShr:
- case Instruction::SRem:
- case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
- case Instruction::SExt:
- Ty = CE->getType();
- NeedsExplicitCast = true;
- TypeIsSigned = true;
- break;
- case Instruction::ZExt:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- Ty = CE->getType();
- NeedsExplicitCast = true;
- break;
- default: break;
- }
- if (NeedsExplicitCast) {
- Out << "((";
- if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
- printSimpleType(Out, Ty, TypeIsSigned);
- else
- printType(Out, Ty); // not integer, sign doesn't matter
- Out << ")(";
- }
- return NeedsExplicitCast;
-}
-
-// Print a constant assuming that it is the operand for a given Opcode. The
-// opcodes that care about sign need to cast their operands to the expected
-// type before the operation proceeds. This function does the casting.
-void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
-
- // Extract the operand's type, we'll need it.
- Type* OpTy = CPV->getType();
-
- // Indicate whether to do the cast or not.
- bool shouldCast = false;
- bool typeIsSigned = false;
-
- // Based on the Opcode for which this Constant is being written, determine
- // the new type to which the operand should be casted by setting the value
- // of OpTy. If we change OpTy, also set shouldCast to true so it gets
- // casted below.
- switch (Opcode) {
- default:
- // for most instructions, it doesn't matter
- break;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- // We need to cast integer arithmetic so that it is always performed
- // as unsigned, to avoid undefined behavior on overflow.
- case Instruction::LShr:
- case Instruction::UDiv:
- case Instruction::URem:
- shouldCast = true;
- break;
- case Instruction::AShr:
- case Instruction::SDiv:
- case Instruction::SRem:
- shouldCast = true;
- typeIsSigned = true;
- break;
- }
-
- // Write out the casted constant if we should, otherwise just write the
- // operand.
- if (shouldCast) {
- Out << "((";
- printSimpleType(Out, OpTy, typeIsSigned);
- Out << ")";
- printConstant(CPV, false);
- Out << ")";
- } else
- printConstant(CPV, false);
-}
-
-std::string CWriter::GetValueName(const Value *Operand) {
-
- // Resolve potential alias.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
- if (const Value *V = GA->resolveAliasedGlobal(false))
- Operand = V;
- }
-
- // Mangle globals with the standard mangler interface for LLC compatibility.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
- SmallString<128> Str;
- Mang->getNameWithPrefix(Str, GV, false);
- return CBEMangle(Str.str().str());
- }
-
- std::string Name = Operand->getName();
-
- if (Name.empty()) { // Assign unique names to local temporaries.
- unsigned &No = AnonValueNumbers[Operand];
- if (No == 0)
- No = ++NextAnonValueNumber;
- Name = "tmp__" + utostr(No);
- }
-
- std::string VarName;
- VarName.reserve(Name.capacity());
-
- for (std::string::iterator I = Name.begin(), E = Name.end();
- I != E; ++I) {
- char ch = *I;
-
- if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
- (ch >= '0' && ch <= '9') || ch == '_')) {
- char buffer[5];
- sprintf(buffer, "_%x_", ch);
- VarName += buffer;
- } else
- VarName += ch;
- }
-
- return "llvm_cbe_" + VarName;
-}
-
-/// writeInstComputationInline - Emit the computation for the specified
-/// instruction inline, with no destination provided.
-void CWriter::writeInstComputationInline(Instruction &I) {
- // We can't currently support integer types other than 1, 8, 16, 32, 64.
- // Validate this.
- Type *Ty = I.getType();
- if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
- Ty!=Type::getInt8Ty(I.getContext()) &&
- Ty!=Type::getInt16Ty(I.getContext()) &&
- Ty!=Type::getInt32Ty(I.getContext()) &&
- Ty!=Type::getInt64Ty(I.getContext()))) {
- report_fatal_error("The C backend does not currently support integer "
- "types of widths other than 1, 8, 16, 32, 64.\n"
- "This is being tracked as PR 4158.");
- }
-
- // If this is a non-trivial bool computation, make sure to truncate down to
- // a 1 bit value. This is important because we want "add i1 x, y" to return
- // "0" when x and y are true, not "2" for example.
- bool NeedBoolTrunc = false;
- if (I.getType() == Type::getInt1Ty(I.getContext()) &&
- !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
- NeedBoolTrunc = true;
-
- if (NeedBoolTrunc)
- Out << "((";
-
- visit(I);
-
- if (NeedBoolTrunc)
- Out << ")&1)";
-}
-
-
-void CWriter::writeOperandInternal(Value *Operand, bool Static) {
- if (Instruction *I = dyn_cast<Instruction>(Operand))
- // Should we inline this instruction to build a tree?
- if (isInlinableInst(*I) && !isDirectAlloca(I)) {
- Out << '(';
- writeInstComputationInline(*I);
- Out << ')';
- return;
- }
-
- Constant* CPV = dyn_cast<Constant>(Operand);
-
- if (CPV && !isa<GlobalValue>(CPV))
- printConstant(CPV, Static);
- else
- Out << GetValueName(Operand);
-}
-
-void CWriter::writeOperand(Value *Operand, bool Static) {
- bool isAddressImplicit = isAddressExposed(Operand);
- if (isAddressImplicit)
- Out << "(&"; // Global variables are referenced as their addresses by llvm
-
- writeOperandInternal(Operand, Static);
-
- if (isAddressImplicit)
- Out << ')';
-}
-
-// Some instructions need to have their result value casted back to the
-// original types because their operands were casted to the expected type.
-// This function takes care of detecting that case and printing the cast
-// for the Instruction.
-bool CWriter::writeInstructionCast(const Instruction &I) {
- Type *Ty = I.getOperand(0)->getType();
- switch (I.getOpcode()) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- // We need to cast integer arithmetic so that it is always performed
- // as unsigned, to avoid undefined behavior on overflow.
- case Instruction::LShr:
- case Instruction::URem:
- case Instruction::UDiv:
- Out << "((";
- printSimpleType(Out, Ty, false);
- Out << ")(";
- return true;
- case Instruction::AShr:
- case Instruction::SRem:
- case Instruction::SDiv:
- Out << "((";
- printSimpleType(Out, Ty, true);
- Out << ")(";
- return true;
- default: break;
- }
- return false;
-}
-
-// Write the operand with a cast to another type based on the Opcode being used.
-// This will be used in cases where an instruction has specific type
-// requirements (usually signedness) for its operands.
-void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
-
- // Extract the operand's type, we'll need it.
- Type* OpTy = Operand->getType();
-
- // Indicate whether to do the cast or not.
- bool shouldCast = false;
-
- // Indicate whether the cast should be to a signed type or not.
- bool castIsSigned = false;
-
- // Based on the Opcode for which this Operand is being written, determine
- // the new type to which the operand should be casted by setting the value
- // of OpTy. If we change OpTy, also set shouldCast to true.
- switch (Opcode) {
- default:
- // for most instructions, it doesn't matter
- break;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- // We need to cast integer arithmetic so that it is always performed
- // as unsigned, to avoid undefined behavior on overflow.
- case Instruction::LShr:
- case Instruction::UDiv:
- case Instruction::URem: // Cast to unsigned first
- shouldCast = true;
- castIsSigned = false;
- break;
- case Instruction::GetElementPtr:
- case Instruction::AShr:
- case Instruction::SDiv:
- case Instruction::SRem: // Cast to signed first
- shouldCast = true;
- castIsSigned = true;
- break;
- }
-
- // Write out the casted operand if we should, otherwise just write the
- // operand.
- if (shouldCast) {
- Out << "((";
- printSimpleType(Out, OpTy, castIsSigned);
- Out << ")";
- writeOperand(Operand);
- Out << ")";
- } else
- writeOperand(Operand);
-}
-
-// Write the operand with a cast to another type based on the icmp predicate
-// being used.
-void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
- // This has to do a cast to ensure the operand has the right signedness.
- // Also, if the operand is a pointer, we make sure to cast to an integer when
- // doing the comparison both for signedness and so that the C compiler doesn't
- // optimize things like "p < NULL" to false (p may contain an integer value
- // f.e.).
- bool shouldCast = Cmp.isRelational();
-
- // Write out the casted operand if we should, otherwise just write the
- // operand.
- if (!shouldCast) {
- writeOperand(Operand);
- return;
- }
-
- // Should this be a signed comparison? If so, convert to signed.
- bool castIsSigned = Cmp.isSigned();
-
- // If the operand was a pointer, convert to a large integer type.
- Type* OpTy = Operand->getType();
- if (OpTy->isPointerTy())
- OpTy = TD->getIntPtrType(Operand->getContext());
-
- Out << "((";
- printSimpleType(Out, OpTy, castIsSigned);
- Out << ")";
- writeOperand(Operand);
- Out << ")";
-}
-
-// generateCompilerSpecificCode - This is where we add conditional compilation
-// directives to cater to specific compilers as need be.
-//
-static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
- const TargetData *TD) {
- // Alloca is hard to get, and we don't want to include stdlib.h here.
- Out << "/* get a declaration for alloca */\n"
- << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
- << "#define alloca(x) __builtin_alloca((x))\n"
- << "#define _alloca(x) __builtin_alloca((x))\n"
- << "#elif defined(__APPLE__)\n"
- << "extern void *__builtin_alloca(unsigned long);\n"
- << "#define alloca(x) __builtin_alloca(x)\n"
- << "#define longjmp _longjmp\n"
- << "#define setjmp _setjmp\n"
- << "#elif defined(__sun__)\n"
- << "#if defined(__sparcv9)\n"
- << "extern void *__builtin_alloca(unsigned long);\n"
- << "#else\n"
- << "extern void *__builtin_alloca(unsigned int);\n"
- << "#endif\n"
- << "#define alloca(x) __builtin_alloca(x)\n"
- << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
- << "#define alloca(x) __builtin_alloca(x)\n"
- << "#elif defined(_MSC_VER)\n"
- << "#define inline _inline\n"
- << "#define alloca(x) _alloca(x)\n"
- << "#else\n"
- << "#include <alloca.h>\n"
- << "#endif\n\n";
-
- // We output GCC specific attributes to preserve 'linkonce'ness on globals.
- // If we aren't being compiled with GCC, just drop these attributes.
- Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
- << "#define __attribute__(X)\n"
- << "#endif\n\n";
-
- // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
- Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
- << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
- << "#elif defined(__GNUC__)\n"
- << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
- << "#else\n"
- << "#define __EXTERNAL_WEAK__\n"
- << "#endif\n\n";
-
- // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
- Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
- << "#define __ATTRIBUTE_WEAK__\n"
- << "#elif defined(__GNUC__)\n"
- << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
- << "#else\n"
- << "#define __ATTRIBUTE_WEAK__\n"
- << "#endif\n\n";
-
- // Add hidden visibility support. FIXME: APPLE_CC?
- Out << "#if defined(__GNUC__)\n"
- << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
- << "#endif\n\n";
-
- // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
- // From the GCC documentation:
- //
- // double __builtin_nan (const char *str)
- //
- // This is an implementation of the ISO C99 function nan.
- //
- // Since ISO C99 defines this function in terms of strtod, which we do
- // not implement, a description of the parsing is in order. The string is
- // parsed as by strtol; that is, the base is recognized by leading 0 or
- // 0x prefixes. The number parsed is placed in the significand such that
- // the least significant bit of the number is at the least significant
- // bit of the significand. The number is truncated to fit the significand
- // field provided. The significand is forced to be a quiet NaN.
- //
- // This function, if given a string literal, is evaluated early enough
- // that it is considered a compile-time constant.
- //
- // float __builtin_nanf (const char *str)
- //
- // Similar to __builtin_nan, except the return type is float.
- //
- // double __builtin_inf (void)
- //
- // Similar to __builtin_huge_val, except a warning is generated if the
- // target floating-point format does not support infinities. This
- // function is suitable for implementing the ISO C99 macro INFINITY.
- //
- // float __builtin_inff (void)
- //
- // Similar to __builtin_inf, except the return type is float.
- Out << "#ifdef __GNUC__\n"
- << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
- << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
- << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
- << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
- << "#define LLVM_INF __builtin_inf() /* Double */\n"
- << "#define LLVM_INFF __builtin_inff() /* Float */\n"
- << "#define LLVM_PREFETCH(addr,rw,locality) "
- "__builtin_prefetch(addr,rw,locality)\n"
- << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
- << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
- << "#define LLVM_ASM __asm__\n"
- << "#else\n"
- << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n"
- << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n"
- << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n"
- << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n"
- << "#define LLVM_INF ((double)0.0) /* Double */\n"
- << "#define LLVM_INFF 0.0F /* Float */\n"
- << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
- << "#define __ATTRIBUTE_CTOR__\n"
- << "#define __ATTRIBUTE_DTOR__\n"
- << "#define LLVM_ASM(X)\n"
- << "#endif\n\n";
-
- Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
- << "#define __builtin_stack_save() 0 /* not implemented */\n"
- << "#define __builtin_stack_restore(X) /* noop */\n"
- << "#endif\n\n";
-
- // Output typedefs for 128-bit integers. If these are needed with a
- // 32-bit target or with a C compiler that doesn't support mode(TI),
- // more drastic measures will be needed.
- Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
- << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
- << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
- << "#endif\n\n";
-
- // Output target-specific code that should be inserted into main.
- Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
-}
-
-/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
-/// the StaticTors set.
-static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
- ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!InitList) return;
-
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
- if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
-
- if (CS->getOperand(1)->isNullValue())
- return; // Found a null terminator, exit printing.
- Constant *FP = CS->getOperand(1);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
- if (CE->isCast())
- FP = CE->getOperand(0);
- if (Function *F = dyn_cast<Function>(FP))
- StaticTors.insert(F);
- }
-}
-
-enum SpecialGlobalClass {
- NotSpecial = 0,
- GlobalCtors, GlobalDtors,
- NotPrinted
-};
-
-/// getGlobalVariableClass - If this is a global that is specially recognized
-/// by LLVM, return a code that indicates how we should handle it.
-static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
- // If this is a global ctors/dtors list, handle it now.
- if (GV->hasAppendingLinkage() && GV->use_empty()) {
- if (GV->getName() == "llvm.global_ctors")
- return GlobalCtors;
- else if (GV->getName() == "llvm.global_dtors")
- return GlobalDtors;
- }
-
- // Otherwise, if it is other metadata, don't print it. This catches things
- // like debug information.
- if (GV->getSection() == "llvm.metadata")
- return NotPrinted;
-
- return NotSpecial;
-}
-
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const char *Str, unsigned Length,
- raw_ostream &Out) {
- for (unsigned i = 0; i != Length; ++i) {
- unsigned char C = Str[i];
- if (isprint(C) && C != '\\' && C != '"')
- Out << C;
- else if (C == '\\')
- Out << "\\\\";
- else if (C == '\"')
- Out << "\\\"";
- else if (C == '\t')
- Out << "\\t";
- else
- Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
- }
-}
-
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
- PrintEscapedString(Str.c_str(), Str.size(), Out);
-}
-
-bool CWriter::doInitialization(Module &M) {
- FunctionPass::doInitialization(M);
-
- // Initialize
- TheModule = &M;
-
- TD = new TargetData(&M);
- IL = new IntrinsicLowering(*TD);
- IL->AddPrototypes(M);
-
-#if 0
- std::string Triple = TheModule->getTargetTriple();
- if (Triple.empty())
- Triple = llvm::sys::getHostTriple();
-
- std::string E;
- if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
- TAsm = Match->createMCAsmInfo(Triple);
-#endif
- TAsm = new CBEMCAsmInfo();
- MRI = new MCRegisterInfo();
- TCtx = new MCContext(*TAsm, *MRI, NULL);
- Mang = new Mangler(*TCtx, *TD);
-
- // Keep track of which functions are static ctors/dtors so they can have
- // an attribute added to their prototypes.
- std::set<Function*> StaticCtors, StaticDtors;
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- switch (getGlobalVariableClass(I)) {
- default: break;
- case GlobalCtors:
- FindStaticTors(I, StaticCtors);
- break;
- case GlobalDtors:
- FindStaticTors(I, StaticDtors);
- break;
- }
- }
-
- // get declaration for alloca
- Out << "/* Provide Declarations */\n";
- Out << "#include <stdarg.h>\n"; // Varargs support
- Out << "#include <setjmp.h>\n"; // Unwind support
- Out << "#include <limits.h>\n"; // With overflow intrinsics support.
- generateCompilerSpecificCode(Out, TD);
-
- // Provide a definition for `bool' if not compiling with a C++ compiler.
- Out << "\n"
- << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
-
- << "\n\n/* Support for floating point constants */\n"
- << "typedef unsigned long long ConstantDoubleTy;\n"
- << "typedef unsigned int ConstantFloatTy;\n"
- << "typedef struct { unsigned long long f1; unsigned short f2; "
- "unsigned short pad[3]; } ConstantFP80Ty;\n"
- // This is used for both kinds of 128-bit long double; meaning differs.
- << "typedef struct { unsigned long long f1; unsigned long long f2; }"
- " ConstantFP128Ty;\n"
- << "\n\n/* Global Declarations */\n";
-
- // First output all the declarations for the program, because C requires
- // Functions & globals to be declared before they are used.
- //
- if (!M.getModuleInlineAsm().empty()) {
- Out << "/* Module asm statements */\n"
- << "asm(";
-
- // Split the string into lines, to make it easier to read the .ll file.
- std::string Asm = M.getModuleInlineAsm();
- size_t CurPos = 0;
- size_t NewLine = Asm.find_first_of('\n', CurPos);
- while (NewLine != std::string::npos) {
- // We found a newline, print the portion of the asm string from the
- // last newline up to this newline.
- Out << "\"";
- PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
- Out);
- Out << "\\n\"\n";
- CurPos = NewLine+1;
- NewLine = Asm.find_first_of('\n', CurPos);
- }
- Out << "\"";
- PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
- Out << "\");\n"
- << "/* End Module asm statements */\n";
- }
-
- // Loop over the symbol table, emitting all named constants.
- printModuleTypes();
-
- // Global variable declarations...
- if (!M.global_empty()) {
- Out << "\n/* External Global Variable Declarations */\n";
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
-
- if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
- I->hasCommonLinkage())
- Out << "extern ";
- else if (I->hasDLLImportLinkage())
- Out << "__declspec(dllimport) ";
- else
- continue; // Internal Global
-
- // Thread Local Storage
- if (I->isThreadLocal())
- Out << "__thread ";
-
- printType(Out, I->getType()->getElementType(), false, GetValueName(I));
-
- if (I->hasExternalWeakLinkage())
- Out << " __EXTERNAL_WEAK__";
- Out << ";\n";
- }
- }
-
- // Function declarations
- Out << "\n/* Function Declarations */\n";
- Out << "double fmod(double, double);\n"; // Support for FP rem
- Out << "float fmodf(float, float);\n";
- Out << "long double fmodl(long double, long double);\n";
-
- // Store the intrinsics which will be declared/defined below.
- SmallVector<const Function*, 8> intrinsicsToDefine;
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- // Don't print declarations for intrinsic functions.
- // Store the used intrinsics, which need to be explicitly defined.
- if (I->isIntrinsic()) {
- switch (I->getIntrinsicID()) {
- default:
- break;
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- intrinsicsToDefine.push_back(I);
- break;
- }
- continue;
- }
-
- if (I->getName() == "setjmp" ||
- I->getName() == "longjmp" || I->getName() == "_setjmp")
- continue;
-
- if (I->hasExternalWeakLinkage())
- Out << "extern ";
- printFunctionSignature(I, true);
- if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
- Out << " __ATTRIBUTE_WEAK__";
- if (I->hasExternalWeakLinkage())
- Out << " __EXTERNAL_WEAK__";
- if (StaticCtors.count(I))
- Out << " __ATTRIBUTE_CTOR__";
- if (StaticDtors.count(I))
- Out << " __ATTRIBUTE_DTOR__";
- if (I->hasHiddenVisibility())
- Out << " __HIDDEN__";
-
- if (I->hasName() && I->getName()[0] == 1)
- Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
-
- Out << ";\n";
- }
-
- // Output the global variable declarations
- if (!M.global_empty()) {
- Out << "\n\n/* Global Variable Declarations */\n";
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration()) {
- // Ignore special globals, such as debug info.
- if (getGlobalVariableClass(I))
- continue;
-
- if (I->hasLocalLinkage())
- Out << "static ";
- else
- Out << "extern ";
-
- // Thread Local Storage
- if (I->isThreadLocal())
- Out << "__thread ";
-
- printType(Out, I->getType()->getElementType(), false,
- GetValueName(I));
-
- if (I->hasLinkOnceLinkage())
- Out << " __attribute__((common))";
- else if (I->hasCommonLinkage()) // FIXME is this right?
- Out << " __ATTRIBUTE_WEAK__";
- else if (I->hasWeakLinkage())
- Out << " __ATTRIBUTE_WEAK__";
- else if (I->hasExternalWeakLinkage())
- Out << " __EXTERNAL_WEAK__";
- if (I->hasHiddenVisibility())
- Out << " __HIDDEN__";
- Out << ";\n";
- }
- }
-
- // Output the global variable definitions and contents...
- if (!M.global_empty()) {
- Out << "\n\n/* Global Variable Definitions and Initialization */\n";
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration()) {
- // Ignore special globals, such as debug info.
- if (getGlobalVariableClass(I))
- continue;
-
- if (I->hasLocalLinkage())
- Out << "static ";
- else if (I->hasDLLImportLinkage())
- Out << "__declspec(dllimport) ";
- else if (I->hasDLLExportLinkage())
- Out << "__declspec(dllexport) ";
-
- // Thread Local Storage
- if (I->isThreadLocal())
- Out << "__thread ";
-
- printType(Out, I->getType()->getElementType(), false,
- GetValueName(I));
- if (I->hasLinkOnceLinkage())
- Out << " __attribute__((common))";
- else if (I->hasWeakLinkage())
- Out << " __ATTRIBUTE_WEAK__";
- else if (I->hasCommonLinkage())
- Out << " __ATTRIBUTE_WEAK__";
-
- if (I->hasHiddenVisibility())
- Out << " __HIDDEN__";
-
- // If the initializer is not null, emit the initializer. If it is null,
- // we try to avoid emitting large amounts of zeros. The problem with
- // this, however, occurs when the variable has weak linkage. In this
- // case, the assembler will complain about the variable being both weak
- // and common, so we disable this optimization.
- // FIXME common linkage should avoid this problem.
- if (!I->getInitializer()->isNullValue()) {
- Out << " = " ;
- writeOperand(I->getInitializer(), true);
- } else if (I->hasWeakLinkage()) {
- // We have to specify an initializer, but it doesn't have to be
- // complete. If the value is an aggregate, print out { 0 }, and let
- // the compiler figure out the rest of the zeros.
- Out << " = " ;
- if (I->getInitializer()->getType()->isStructTy() ||
- I->getInitializer()->getType()->isVectorTy()) {
- Out << "{ 0 }";
- } else if (I->getInitializer()->getType()->isArrayTy()) {
- // As with structs and vectors, but with an extra set of braces
- // because arrays are wrapped in structs.
- Out << "{ { 0 } }";
- } else {
- // Just print it out normally.
- writeOperand(I->getInitializer(), true);
- }
- }
- Out << ";\n";
- }
- }
-
- if (!M.empty())
- Out << "\n\n/* Function Bodies */\n";
-
- // Emit some helper functions for dealing with FCMP instruction's
- // predicates
- Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
- Out << "return X == X && Y == Y; }\n";
- Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
- Out << "return X != X || Y != Y; }\n";
- Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
- Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
- Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
- Out << "return X != Y; }\n";
- Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
- Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
- Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
- Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
- Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
- Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
- Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
- Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
- Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
- Out << "return X == Y ; }\n";
- Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
- Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
- Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
- Out << "return X < Y ; }\n";
- Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
- Out << "return X > Y ; }\n";
- Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
- Out << "return X <= Y ; }\n";
- Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
- Out << "return X >= Y ; }\n";
-
- // Emit definitions of the intrinsics.
- for (SmallVector<const Function*, 8>::const_iterator
- I = intrinsicsToDefine.begin(),
- E = intrinsicsToDefine.end(); I != E; ++I) {
- printIntrinsicDefinition(**I, Out);
- }
-
- return false;
-}
-
-
-/// Output all floating point constants that cannot be printed accurately...
-void CWriter::printFloatingPointConstants(Function &F) {
- // Scan the module for floating point constants. If any FP constant is used
- // in the function, we want to redirect it here so that we do not depend on
- // the precision of the printed form, unless the printed form preserves
- // precision.
- //
- for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
- I != E; ++I)
- printFloatingPointConstants(*I);
-
- Out << '\n';
-}
-
-void CWriter::printFloatingPointConstants(const Constant *C) {
- // If this is a constant expression, recursively check for constant fp values.
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
- printFloatingPointConstants(CE->getOperand(i));
- return;
- }
-
- // Otherwise, check for a FP constant that we need to print.
- const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
- if (FPC == 0 ||
- // Do not put in FPConstantMap if safe.
- isFPCSafeToPrint(FPC) ||
- // Already printed this constant?
- FPConstantMap.count(FPC))
- return;
-
- FPConstantMap[FPC] = FPCounter; // Number the FP constants
-
- if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
- double Val = FPC->getValueAPF().convertToDouble();
- uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
- Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
- << " = 0x" << utohexstr(i)
- << "ULL; /* " << Val << " */\n";
- } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
- float Val = FPC->getValueAPF().convertToFloat();
- uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
- getZExtValue();
- Out << "static const ConstantFloatTy FPConstant" << FPCounter++
- << " = 0x" << utohexstr(i)
- << "U; /* " << Val << " */\n";
- } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
- // api needed to prevent premature destruction
- APInt api = FPC->getValueAPF().bitcastToAPInt();
- const uint64_t *p = api.getRawData();
- Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
- << " = { 0x" << utohexstr(p[0])
- << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
- << "}; /* Long double constant */\n";
- } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
- FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
- APInt api = FPC->getValueAPF().bitcastToAPInt();
- const uint64_t *p = api.getRawData();
- Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
- << " = { 0x"
- << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
- << "}; /* Long double constant */\n";
-
- } else {
- llvm_unreachable("Unknown float type!");
- }
-}
-
-
-/// printSymbolTable - Run through symbol table looking for type names. If a
-/// type name is found, emit its declaration...
-///
-void CWriter::printModuleTypes() {
- Out << "/* Helper union for bitcasts */\n";
- Out << "typedef union {\n";
- Out << " unsigned int Int32;\n";
- Out << " unsigned long long Int64;\n";
- Out << " float Float;\n";
- Out << " double Double;\n";
- Out << "} llvmBitCastUnion;\n";
-
- // Get all of the struct types used in the module.
- std::vector<StructType*> StructTypes;
- TheModule->findUsedStructTypes(StructTypes);
-
- if (StructTypes.empty()) return;
-
- Out << "/* Structure forward decls */\n";
-
- unsigned NextTypeID = 0;
-
- // If any of them are missing names, add a unique ID to UnnamedStructIDs.
- // Print out forward declarations for structure types.
- for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
- StructType *ST = StructTypes[i];
-
- if (ST->isLiteral() || ST->getName().empty())
- UnnamedStructIDs[ST] = NextTypeID++;
-
- std::string Name = getStructName(ST);
-
- Out << "typedef struct " << Name << ' ' << Name << ";\n";
- }
-
- Out << '\n';
-
- // Keep track of which structures have been printed so far.
- SmallPtrSet<Type *, 16> StructPrinted;
-
- // Loop over all structures then push them into the stack so they are
- // printed in the correct order.
- //
- Out << "/* Structure contents */\n";
- for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
- if (StructTypes[i]->isStructTy())
- // Only print out used types!
- printContainedStructs(StructTypes[i], StructPrinted);
-}
-
-// Push the struct onto the stack and recursively push all structs
-// this one depends on.
-//
-// TODO: Make this work properly with vector types
-//
-void CWriter::printContainedStructs(Type *Ty,
- SmallPtrSet<Type *, 16> &StructPrinted) {
- // Don't walk through pointers.
- if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
- return;
-
- // Print all contained types first.
- for (Type::subtype_iterator I = Ty->subtype_begin(),
- E = Ty->subtype_end(); I != E; ++I)
- printContainedStructs(*I, StructPrinted);
-
- if (StructType *ST = dyn_cast<StructType>(Ty)) {
- // Check to see if we have already printed this struct.
- if (!StructPrinted.insert(Ty)) return;
-
- // Print structure type out.
- printType(Out, ST, false, getStructName(ST), true);
- Out << ";\n\n";
- }
-}
-
-void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
- /// isStructReturn - Should this function actually return a struct by-value?
- bool isStructReturn = F->hasStructRetAttr();
-
- if (F->hasLocalLinkage()) Out << "static ";
- if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
- if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
- switch (F->getCallingConv()) {
- case CallingConv::X86_StdCall:
- Out << "__attribute__((stdcall)) ";
- break;
- case CallingConv::X86_FastCall:
- Out << "__attribute__((fastcall)) ";
- break;
- case CallingConv::X86_ThisCall:
- Out << "__attribute__((thiscall)) ";
- break;
- default:
- break;
- }
-
- // Loop over the arguments, printing them...
- FunctionType *FT = cast<FunctionType>(F->getFunctionType());
- const AttrListPtr &PAL = F->getAttributes();
-
- std::string tstr;
- raw_string_ostream FunctionInnards(tstr);
-
- // Print out the name...
- FunctionInnards << GetValueName(F) << '(';
-
- bool PrintedArg = false;
- if (!F->isDeclaration()) {
- if (!F->arg_empty()) {
- Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- unsigned Idx = 1;
-
- // If this is a struct-return function, don't print the hidden
- // struct-return argument.
- if (isStructReturn) {
- assert(I != E && "Invalid struct return function!");
- ++I;
- ++Idx;
- }
-
- std::string ArgName;
- for (; I != E; ++I) {
- if (PrintedArg) FunctionInnards << ", ";
- if (I->hasName() || !Prototype)
- ArgName = GetValueName(I);
- else
- ArgName = "";
- Type *ArgTy = I->getType();
- if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- ArgTy = cast<PointerType>(ArgTy)->getElementType();
- ByValParams.insert(I);
- }
- printType(FunctionInnards, ArgTy,
- /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
- ArgName);
- PrintedArg = true;
- ++Idx;
- }
- }
- } else {
- // Loop over the arguments, printing them.
- FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
- unsigned Idx = 1;
-
- // If this is a struct-return function, don't print the hidden
- // struct-return argument.
- if (isStructReturn) {
- assert(I != E && "Invalid struct return function!");
- ++I;
- ++Idx;
- }
-
- for (; I != E; ++I) {
- if (PrintedArg) FunctionInnards << ", ";
- Type *ArgTy = *I;
- if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(ArgTy->isPointerTy());
- ArgTy = cast<PointerType>(ArgTy)->getElementType();
- }
- printType(FunctionInnards, ArgTy,
- /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
- PrintedArg = true;
- ++Idx;
- }
- }
-
- if (!PrintedArg && FT->isVarArg()) {
- FunctionInnards << "int vararg_dummy_arg";
- PrintedArg = true;
- }
-
- // Finish printing arguments... if this is a vararg function, print the ...,
- // unless there are no known types, in which case, we just emit ().
- //
- if (FT->isVarArg() && PrintedArg) {
- FunctionInnards << ",..."; // Output varargs portion of signature!
- } else if (!FT->isVarArg() && !PrintedArg) {
- FunctionInnards << "void"; // ret() -> ret(void) in C.
- }
- FunctionInnards << ')';
-
- // Get the return tpe for the function.
- Type *RetTy;
- if (!isStructReturn)
- RetTy = F->getReturnType();
- else {
- // If this is a struct-return function, print the struct-return type.
- RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
- }
-
- // Print out the return type and the signature built above.
- printType(Out, RetTy,
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
- FunctionInnards.str());
-}
-
-static inline bool isFPIntBitCast(const Instruction &I) {
- if (!isa<BitCastInst>(I))
- return false;
- Type *SrcTy = I.getOperand(0)->getType();
- Type *DstTy = I.getType();
- return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
- (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
-}
-
-void CWriter::printFunction(Function &F) {
- /// isStructReturn - Should this function actually return a struct by-value?
- bool isStructReturn = F.hasStructRetAttr();
-
- printFunctionSignature(&F, false);
- Out << " {\n";
-
- // If this is a struct return function, handle the result with magic.
- if (isStructReturn) {
- Type *StructTy =
- cast<PointerType>(F.arg_begin()->getType())->getElementType();
- Out << " ";
- printType(Out, StructTy, false, "StructReturn");
- Out << "; /* Struct return temporary */\n";
-
- Out << " ";
- printType(Out, F.arg_begin()->getType(), false,
- GetValueName(F.arg_begin()));
- Out << " = &StructReturn;\n";
- }
-
- bool PrintedVar = false;
-
- // print local variable information for the function
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
- if (const AllocaInst *AI = isDirectAlloca(&*I)) {
- Out << " ";
- printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
- Out << "; /* Address-exposed local */\n";
- PrintedVar = true;
- } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
- !isInlinableInst(*I)) {
- Out << " ";
- printType(Out, I->getType(), false, GetValueName(&*I));
- Out << ";\n";
-
- if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well...
- Out << " ";
- printType(Out, I->getType(), false,
- GetValueName(&*I)+"__PHI_TEMPORARY");
- Out << ";\n";
- }
- PrintedVar = true;
- }
- // We need a temporary for the BitCast to use so it can pluck a value out
- // of a union to do the BitCast. This is separate from the need for a
- // variable to hold the result of the BitCast.
- if (isFPIntBitCast(*I)) {
- Out << " llvmBitCastUnion " << GetValueName(&*I)
- << "__BITCAST_TEMPORARY;\n";
- PrintedVar = true;
- }
- }
-
- if (PrintedVar)
- Out << '\n';
-
- if (F.hasExternalLinkage() && F.getName() == "main")
- Out << " CODE_FOR_MAIN();\n";
-
- // print the basic blocks
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (Loop *L = LI->getLoopFor(BB)) {
- if (L->getHeader() == BB && L->getParentLoop() == 0)
- printLoop(L);
- } else {
- printBasicBlock(BB);
- }
- }
-
- Out << "}\n\n";
-}
-
-void CWriter::printLoop(Loop *L) {
- Out << " do { /* Syntactic loop '" << L->getHeader()->getName()
- << "' to make GCC happy */\n";
- for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
- BasicBlock *BB = L->getBlocks()[i];
- Loop *BBLoop = LI->getLoopFor(BB);
- if (BBLoop == L)
- printBasicBlock(BB);
- else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
- printLoop(BBLoop);
- }
- Out << " } while (1); /* end of syntactic loop '"
- << L->getHeader()->getName() << "' */\n";
-}
-
-void CWriter::printBasicBlock(BasicBlock *BB) {
-
- // Don't print the label for the basic block if there are no uses, or if
- // the only terminator use is the predecessor basic block's terminator.
- // We have to scan the use list because PHI nodes use basic blocks too but
- // do not require a label to be generated.
- //
- bool NeedsLabel = false;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (isGotoCodeNecessary(*PI, BB)) {
- NeedsLabel = true;
- break;
- }
-
- if (NeedsLabel) Out << GetValueName(BB) << ":\n";
-
- // Output all of the instructions in the basic block...
- for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
- ++II) {
- if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
- if (II->getType() != Type::getVoidTy(BB->getContext()) &&
- !isInlineAsm(*II))
- outputLValue(II);
- else
- Out << " ";
- writeInstComputationInline(*II);
- Out << ";\n";
- }
- }
-
- // Don't emit prefix or suffix for the terminator.
- visit(*BB->getTerminator());
-}
-
-
-// Specific Instruction type classes... note that all of the casts are
-// necessary because we use the instruction classes as opaque types...
-//
-void CWriter::visitReturnInst(ReturnInst &I) {
- // If this is a struct return function, return the temporary struct.
- bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
-
- if (isStructReturn) {
- Out << " return StructReturn;\n";
- return;
- }
-
- // Don't output a void return if this is the last basic block in the function
- if (I.getNumOperands() == 0 &&
- &*--I.getParent()->getParent()->end() == I.getParent() &&
- !I.getParent()->size() == 1) {
- return;
- }
-
- Out << " return";
- if (I.getNumOperands()) {
- Out << ' ';
- writeOperand(I.getOperand(0));
- }
- Out << ";\n";
-}
-
-void CWriter::visitSwitchInst(SwitchInst &SI) {
-
- Value* Cond = SI.getCondition();
-
- Out << " switch (";
- writeOperand(Cond);
- Out << ") {\n default:\n";
- printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
- printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
- Out << ";\n";
-
- unsigned NumCases = SI.getNumCases();
- // Skip the first item since that's the default case.
- for (unsigned i = 1; i < NumCases; ++i) {
- ConstantInt* CaseVal = SI.getCaseValue(i);
- BasicBlock* Succ = SI.getSuccessor(i);
- Out << " case ";
- writeOperand(CaseVal);
- Out << ":\n";
- printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
- printBranchToBlock(SI.getParent(), Succ, 2);
- if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
- Out << " break;\n";
- }
-
- Out << " }\n";
-}
-
-void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
- Out << " goto *(void*)(";
- writeOperand(IBI.getOperand(0));
- Out << ");\n";
-}
-
-void CWriter::visitUnreachableInst(UnreachableInst &I) {
- Out << " /*UNREACHABLE*/;\n";
-}
-
-bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
- /// FIXME: This should be reenabled, but loop reordering safe!!
- return true;
-
- if (llvm::next(Function::iterator(From)) != Function::iterator(To))
- return true; // Not the direct successor, we need a goto.
-
- //isa<SwitchInst>(From->getTerminator())
-
- if (LI->getLoopFor(From) != LI->getLoopFor(To))
- return true;
- return false;
-}
-
-void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
- BasicBlock *Successor,
- unsigned Indent) {
- for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- // Now we have to do the printing.
- Value *IV = PN->getIncomingValueForBlock(CurBlock);
- if (!isa<UndefValue>(IV)) {
- Out << std::string(Indent, ' ');
- Out << " " << GetValueName(I) << "__PHI_TEMPORARY = ";
- writeOperand(IV);
- Out << "; /* for PHI node */\n";
- }
- }
-}
-
-void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
- unsigned Indent) {
- if (isGotoCodeNecessary(CurBB, Succ)) {
- Out << std::string(Indent, ' ') << " goto ";
- writeOperand(Succ);
- Out << ";\n";
- }
-}
-
-// Branch instruction printing - Avoid printing out a branch to a basic block
-// that immediately succeeds the current one.
-//
-void CWriter::visitBranchInst(BranchInst &I) {
-
- if (I.isConditional()) {
- if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
- Out << " if (";
- writeOperand(I.getCondition());
- Out << ") {\n";
-
- printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
- printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
-
- if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
- Out << " } else {\n";
- printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
- printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
- }
- } else {
- // First goto not necessary, assume second one is...
- Out << " if (!";
- writeOperand(I.getCondition());
- Out << ") {\n";
-
- printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
- printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
- }
-
- Out << " }\n";
- } else {
- printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
- printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
- }
- Out << "\n";
-}
-
-// PHI nodes get copied into temporary values at the end of predecessor basic
-// blocks. We now need to copy these temporary values into the REAL value for
-// the PHI.
-void CWriter::visitPHINode(PHINode &I) {
- writeOperand(&I);
- Out << "__PHI_TEMPORARY";
-}
-
-
-void CWriter::visitBinaryOperator(Instruction &I) {
- // binary instructions, shift instructions, setCond instructions.
- assert(!I.getType()->isPointerTy());
-
- // We must cast the results of binary operations which might be promoted.
- bool needsCast = false;
- if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
- (I.getType() == Type::getInt16Ty(I.getContext()))
- || (I.getType() == Type::getFloatTy(I.getContext()))) {
- needsCast = true;
- Out << "((";
- printType(Out, I.getType(), false);
- Out << ")(";
- }
-
- // If this is a negation operation, print it out as such. For FP, we don't
- // want to print "-0.0 - X".
- if (BinaryOperator::isNeg(&I)) {
- Out << "-(";
- writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
- Out << ")";
- } else if (BinaryOperator::isFNeg(&I)) {
- Out << "-(";
- writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
- Out << ")";
- } else if (I.getOpcode() == Instruction::FRem) {
- // Output a call to fmod/fmodf instead of emitting a%b
- if (I.getType() == Type::getFloatTy(I.getContext()))
- Out << "fmodf(";
- else if (I.getType() == Type::getDoubleTy(I.getContext()))
- Out << "fmod(";
- else // all 3 flavors of long double
- Out << "fmodl(";
- writeOperand(I.getOperand(0));
- Out << ", ";
- writeOperand(I.getOperand(1));
- Out << ")";
- } else {
-
- // Write out the cast of the instruction's value back to the proper type
- // if necessary.
- bool NeedsClosingParens = writeInstructionCast(I);
-
- // Certain instructions require the operand to be forced to a specific type
- // so we use writeOperandWithCast here instead of writeOperand. Similarly
- // below for operand 1
- writeOperandWithCast(I.getOperand(0), I.getOpcode());
-
- switch (I.getOpcode()) {
- case Instruction::Add:
- case Instruction::FAdd: Out << " + "; break;
- case Instruction::Sub:
- case Instruction::FSub: Out << " - "; break;
- case Instruction::Mul:
- case Instruction::FMul: Out << " * "; break;
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem: Out << " % "; break;
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv: Out << " / "; break;
- case Instruction::And: Out << " & "; break;
- case Instruction::Or: Out << " | "; break;
- case Instruction::Xor: Out << " ^ "; break;
- case Instruction::Shl : Out << " << "; break;
- case Instruction::LShr:
- case Instruction::AShr: Out << " >> "; break;
- default:
-#ifndef NDEBUG
- errs() << "Invalid operator type!" << I;
-#endif
- llvm_unreachable(0);
- }
-
- writeOperandWithCast(I.getOperand(1), I.getOpcode());
- if (NeedsClosingParens)
- Out << "))";
- }
-
- if (needsCast) {
- Out << "))";
- }
-}
-
-void CWriter::visitICmpInst(ICmpInst &I) {
- // We must cast the results of icmp which might be promoted.
- bool needsCast = false;
-
- // Write out the cast of the instruction's value back to the proper type
- // if necessary.
- bool NeedsClosingParens = writeInstructionCast(I);
-
- // Certain icmp predicate require the operand to be forced to a specific type
- // so we use writeOperandWithCast here instead of writeOperand. Similarly
- // below for operand 1
- writeOperandWithCast(I.getOperand(0), I);
-
- switch (I.getPredicate()) {
- case ICmpInst::ICMP_EQ: Out << " == "; break;
- case ICmpInst::ICMP_NE: Out << " != "; break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE: Out << " <= "; break;
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE: Out << " >= "; break;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT: Out << " < "; break;
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT: Out << " > "; break;
- default:
-#ifndef NDEBUG
- errs() << "Invalid icmp predicate!" << I;
-#endif
- llvm_unreachable(0);
- }
-
- writeOperandWithCast(I.getOperand(1), I);
- if (NeedsClosingParens)
- Out << "))";
-
- if (needsCast) {
- Out << "))";
- }
-}
-
-void CWriter::visitFCmpInst(FCmpInst &I) {
- if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
- Out << "0";
- return;
- }
- if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
- Out << "1";
- return;
- }
-
- const char* op = 0;
- switch (I.getPredicate()) {
- default: llvm_unreachable("Illegal FCmp predicate");
- case FCmpInst::FCMP_ORD: op = "ord"; break;
- case FCmpInst::FCMP_UNO: op = "uno"; break;
- case FCmpInst::FCMP_UEQ: op = "ueq"; break;
- case FCmpInst::FCMP_UNE: op = "une"; break;
- case FCmpInst::FCMP_ULT: op = "ult"; break;
- case FCmpInst::FCMP_ULE: op = "ule"; break;
- case FCmpInst::FCMP_UGT: op = "ugt"; break;
- case FCmpInst::FCMP_UGE: op = "uge"; break;
- case FCmpInst::FCMP_OEQ: op = "oeq"; break;
- case FCmpInst::FCMP_ONE: op = "one"; break;
- case FCmpInst::FCMP_OLT: op = "olt"; break;
- case FCmpInst::FCMP_OLE: op = "ole"; break;
- case FCmpInst::FCMP_OGT: op = "ogt"; break;
- case FCmpInst::FCMP_OGE: op = "oge"; break;
- }
-
- Out << "llvm_fcmp_" << op << "(";
- // Write the first operand
- writeOperand(I.getOperand(0));
- Out << ", ";
- // Write the second operand
- writeOperand(I.getOperand(1));
- Out << ")";
-}
-
-static const char * getFloatBitCastField(Type *Ty) {
- switch (Ty->getTypeID()) {
- default: llvm_unreachable("Invalid Type");
- case Type::FloatTyID: return "Float";
- case Type::DoubleTyID: return "Double";
- case Type::IntegerTyID: {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits <= 32)
- return "Int32";
- else
- return "Int64";
- }
- }
-}
-
-void CWriter::visitCastInst(CastInst &I) {
- Type *DstTy = I.getType();
- Type *SrcTy = I.getOperand(0)->getType();
- if (isFPIntBitCast(I)) {
- Out << '(';
- // These int<->float and long<->double casts need to be handled specially
- Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
- << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
- writeOperand(I.getOperand(0));
- Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
- << getFloatBitCastField(I.getType());
- Out << ')';
- return;
- }
-
- Out << '(';
- printCast(I.getOpcode(), SrcTy, DstTy);
-
- // Make a sext from i1 work by subtracting the i1 from 0 (an int).
- if (SrcTy == Type::getInt1Ty(I.getContext()) &&
- I.getOpcode() == Instruction::SExt)
- Out << "0-";
-
- writeOperand(I.getOperand(0));
-
- if (DstTy == Type::getInt1Ty(I.getContext()) &&
- (I.getOpcode() == Instruction::Trunc ||
- I.getOpcode() == Instruction::FPToUI ||
- I.getOpcode() == Instruction::FPToSI ||
- I.getOpcode() == Instruction::PtrToInt)) {
- // Make sure we really get a trunc to bool by anding the operand with 1
- Out << "&1u";
- }
- Out << ')';
-}
-
-void CWriter::visitSelectInst(SelectInst &I) {
- Out << "((";
- writeOperand(I.getCondition());
- Out << ") ? (";
- writeOperand(I.getTrueValue());
- Out << ") : (";
- writeOperand(I.getFalseValue());
- Out << "))";
-}
-
-// Returns the macro name or value of the max or min of an integer type
-// (as defined in limits.h).
-static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax,
- raw_ostream &Out) {
- const char* type;
- const char* sprefix = "";
-
- unsigned NumBits = Ty.getBitWidth();
- if (NumBits <= 8) {
- type = "CHAR";
- sprefix = "S";
- } else if (NumBits <= 16) {
- type = "SHRT";
- } else if (NumBits <= 32) {
- type = "INT";
- } else if (NumBits <= 64) {
- type = "LLONG";
- } else {
- llvm_unreachable("Bit widths > 64 not implemented yet");
- }
-
- if (isSigned)
- Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
- else
- Out << "U" << type << (isMax ? "_MAX" : "0");
-}
-
-#ifndef NDEBUG
-static bool isSupportedIntegerSize(IntegerType &T) {
- return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
- T.getBitWidth() == 32 || T.getBitWidth() == 64;
-}
-#endif
-
-void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
- FunctionType *funT = F.getFunctionType();
- Type *retT = F.getReturnType();
- IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
-
- assert(isSupportedIntegerSize(*elemT) &&
- "CBackend does not support arbitrary size integers.");
- assert(cast<StructType>(retT)->getElementType(0) == elemT &&
- elemT == funT->getParamType(0) && funT->getNumParams() == 2);
-
- switch (F.getIntrinsicID()) {
- default:
- llvm_unreachable("Unsupported Intrinsic.");
- case Intrinsic::uadd_with_overflow:
- // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
- // Rty r;
- // r.field0 = a + b;
- // r.field1 = (r.field0 < a);
- // return r;
- // }
- Out << "static inline ";
- printType(Out, retT);
- Out << GetValueName(&F);
- Out << "(";
- printSimpleType(Out, elemT, false);
- Out << "a,";
- printSimpleType(Out, elemT, false);
- Out << "b) {\n ";
- printType(Out, retT);
- Out << "r;\n";
- Out << " r.field0 = a + b;\n";
- Out << " r.field1 = (r.field0 < a);\n";
- Out << " return r;\n}\n";
- break;
-
- case Intrinsic::sadd_with_overflow:
- // static inline Rty sadd_ixx(ixx a, ixx b) {
- // Rty r;
- // r.field1 = (b > 0 && a > XX_MAX - b) ||
- // (b < 0 && a < XX_MIN - b);
- // r.field0 = r.field1 ? 0 : a + b;
- // return r;
- // }
- Out << "static ";
- printType(Out, retT);
- Out << GetValueName(&F);
- Out << "(";
- printSimpleType(Out, elemT, true);
- Out << "a,";
- printSimpleType(Out, elemT, true);
- Out << "b) {\n ";
- printType(Out, retT);
- Out << "r;\n";
- Out << " r.field1 = (b > 0 && a > ";
- printLimitValue(*elemT, true, true, Out);
- Out << " - b) || (b < 0 && a < ";
- printLimitValue(*elemT, true, false, Out);
- Out << " - b);\n";
- Out << " r.field0 = r.field1 ? 0 : a + b;\n";
- Out << " return r;\n}\n";
- break;
- }
-}
-
-void CWriter::lowerIntrinsics(Function &F) {
- // This is used to keep track of intrinsics that get generated to a lowered
- // function. We must generate the prototypes before the function body which
- // will only be expanded on first use (by the loop below).
- std::vector<Function*> prototypesToGen;
-
- // Examine all the instructions in this function to find the intrinsics that
- // need to be lowered.
- for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
- if (CallInst *CI = dyn_cast<CallInst>(I++))
- if (Function *F = CI->getCalledFunction())
- switch (F->getIntrinsicID()) {
- case Intrinsic::not_intrinsic:
- case Intrinsic::vastart:
- case Intrinsic::vacopy:
- case Intrinsic::vaend:
- case Intrinsic::returnaddress:
- case Intrinsic::frameaddress:
- case Intrinsic::setjmp:
- case Intrinsic::longjmp:
- case Intrinsic::prefetch:
- case Intrinsic::powi:
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse_cmp_ps:
- case Intrinsic::x86_sse2_cmp_sd:
- case Intrinsic::x86_sse2_cmp_pd:
- case Intrinsic::ppc_altivec_lvsl:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- // We directly implement these intrinsics
- break;
- default:
- // If this is an intrinsic that directly corresponds to a GCC
- // builtin, we handle it.
- const char *BuiltinName = "";
-#define GET_GCC_BUILTIN_NAME
-#include "llvm/Intrinsics.gen"
-#undef GET_GCC_BUILTIN_NAME
- // If we handle it, don't lower it.
- if (BuiltinName[0]) break;
-
- // All other intrinsic calls we must lower.
- Instruction *Before = 0;
- if (CI != &BB->front())
- Before = prior(BasicBlock::iterator(CI));
-
- IL->LowerIntrinsicCall(CI);
- if (Before) { // Move iterator to instruction after call
- I = Before; ++I;
- } else {
- I = BB->begin();
- }
- // If the intrinsic got lowered to another call, and that call has
- // a definition then we need to make sure its prototype is emitted
- // before any calls to it.
- if (CallInst *Call = dyn_cast<CallInst>(I))
- if (Function *NewF = Call->getCalledFunction())
- if (!NewF->isDeclaration())
- prototypesToGen.push_back(NewF);
-
- break;
- }
-
- // We may have collected some prototypes to emit in the loop above.
- // Emit them now, before the function that uses them is emitted. But,
- // be careful not to emit them twice.
- std::vector<Function*>::iterator I = prototypesToGen.begin();
- std::vector<Function*>::iterator E = prototypesToGen.end();
- for ( ; I != E; ++I) {
- if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
- Out << '\n';
- printFunctionSignature(*I, true);
- Out << ";\n";
- }
- }
-}
-
-void CWriter::visitCallInst(CallInst &I) {
- if (isa<InlineAsm>(I.getCalledValue()))
- return visitInlineAsm(I);
-
- bool WroteCallee = false;
-
- // Handle intrinsic function calls first...
- if (Function *F = I.getCalledFunction())
- if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
- if (visitBuiltinCall(I, ID, WroteCallee))
- return;
-
- Value *Callee = I.getCalledValue();
-
- PointerType *PTy = cast<PointerType>(Callee->getType());
- FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-
- // If this is a call to a struct-return function, assign to the first
- // parameter instead of passing it to the call.
- const AttrListPtr &PAL = I.getAttributes();
- bool hasByVal = I.hasByValArgument();
- bool isStructRet = I.hasStructRetAttr();
- if (isStructRet) {
- writeOperandDeref(I.getArgOperand(0));
- Out << " = ";
- }
-
- if (I.isTailCall()) Out << " /*tail*/ ";
-
- if (!WroteCallee) {
- // If this is an indirect call to a struct return function, we need to cast
- // the pointer. Ditto for indirect calls with byval arguments.
- bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
-
- // GCC is a real PITA. It does not permit codegening casts of functions to
- // function pointers if they are in a call (it generates a trap instruction
- // instead!). We work around this by inserting a cast to void* in between
- // the function and the function pointer cast. Unfortunately, we can't just
- // form the constant expression here, because the folder will immediately
- // nuke it.
- //
- // Note finally, that this is completely unsafe. ANSI C does not guarantee
- // that void* and function pointers have the same size. :( To deal with this
- // in the common case, we handle casts where the number of arguments passed
- // match exactly.
- //
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
- if (CE->isCast())
- if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
- NeedsCast = true;
- Callee = RF;
- }
-
- if (NeedsCast) {
- // Ok, just cast the pointer type.
- Out << "((";
- if (isStructRet)
- printStructReturnPointerFunctionType(Out, PAL,
- cast<PointerType>(I.getCalledValue()->getType()));
- else if (hasByVal)
- printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
- else
- printType(Out, I.getCalledValue()->getType());
- Out << ")(void*)";
- }
- writeOperand(Callee);
- if (NeedsCast) Out << ')';
- }
-
- Out << '(';
-
- bool PrintedArg = false;
- if(FTy->isVarArg() && !FTy->getNumParams()) {
- Out << "0 /*dummy arg*/";
- PrintedArg = true;
- }
-
- unsigned NumDeclaredParams = FTy->getNumParams();
- CallSite CS(&I);
- CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- unsigned ArgNo = 0;
- if (isStructRet) { // Skip struct return argument.
- ++AI;
- ++ArgNo;
- }
-
-
- for (; AI != AE; ++AI, ++ArgNo) {
- if (PrintedArg) Out << ", ";
- if (ArgNo < NumDeclaredParams &&
- (*AI)->getType() != FTy->getParamType(ArgNo)) {
- Out << '(';
- printType(Out, FTy->getParamType(ArgNo),
- /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
- Out << ')';
- }
- // Check if the argument is expected to be passed by value.
- if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
- writeOperandDeref(*AI);
- else
- writeOperand(*AI);
- PrintedArg = true;
- }
- Out << ')';
-}
-
-/// visitBuiltinCall - Handle the call to the specified builtin. Returns true
-/// if the entire call is handled, return false if it wasn't handled, and
-/// optionally set 'WroteCallee' if the callee has already been printed out.
-bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
- bool &WroteCallee) {
- switch (ID) {
- default: {
- // If this is an intrinsic that directly corresponds to a GCC
- // builtin, we emit it here.
- const char *BuiltinName = "";
- Function *F = I.getCalledFunction();
-#define GET_GCC_BUILTIN_NAME
-#include "llvm/Intrinsics.gen"
-#undef GET_GCC_BUILTIN_NAME
- assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
-
- Out << BuiltinName;
- WroteCallee = true;
- return false;
- }
- case Intrinsic::vastart:
- Out << "0; ";
-
- Out << "va_start(*(va_list*)";
- writeOperand(I.getArgOperand(0));
- Out << ", ";
- // Output the last argument to the enclosing function.
- if (I.getParent()->getParent()->arg_empty())
- Out << "vararg_dummy_arg";
- else
- writeOperand(--I.getParent()->getParent()->arg_end());
- Out << ')';
- return true;
- case Intrinsic::vaend:
- if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
- Out << "0; va_end(*(va_list*)";
- writeOperand(I.getArgOperand(0));
- Out << ')';
- } else {
- Out << "va_end(*(va_list*)0)";
- }
- return true;
- case Intrinsic::vacopy:
- Out << "0; ";
- Out << "va_copy(*(va_list*)";
- writeOperand(I.getArgOperand(0));
- Out << ", *(va_list*)";
- writeOperand(I.getArgOperand(1));
- Out << ')';
- return true;
- case Intrinsic::returnaddress:
- Out << "__builtin_return_address(";
- writeOperand(I.getArgOperand(0));
- Out << ')';
- return true;
- case Intrinsic::frameaddress:
- Out << "__builtin_frame_address(";
- writeOperand(I.getArgOperand(0));
- Out << ')';
- return true;
- case Intrinsic::powi:
- Out << "__builtin_powi(";
- writeOperand(I.getArgOperand(0));
- Out << ", ";
- writeOperand(I.getArgOperand(1));
- Out << ')';
- return true;
- case Intrinsic::setjmp:
- Out << "setjmp(*(jmp_buf*)";
- writeOperand(I.getArgOperand(0));
- Out << ')';
- return true;
- case Intrinsic::longjmp:
- Out << "longjmp(*(jmp_buf*)";
- writeOperand(I.getArgOperand(0));
- Out << ", ";
- writeOperand(I.getArgOperand(1));
- Out << ')';
- return true;
- case Intrinsic::prefetch:
- Out << "LLVM_PREFETCH((const void *)";
- writeOperand(I.getArgOperand(0));
- Out << ", ";
- writeOperand(I.getArgOperand(1));
- Out << ", ";
- writeOperand(I.getArgOperand(2));
- Out << ")";
- return true;
- case Intrinsic::stacksave:
- // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
- // to work around GCC bugs (see PR1809).
- Out << "0; *((void**)&" << GetValueName(&I)
- << ") = __builtin_stack_save()";
- return true;
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse_cmp_ps:
- case Intrinsic::x86_sse2_cmp_sd:
- case Intrinsic::x86_sse2_cmp_pd:
- Out << '(';
- printType(Out, I.getType());
- Out << ')';
- // Multiple GCC builtins multiplex onto this intrinsic.
- switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
- default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
- case 0: Out << "__builtin_ia32_cmpeq"; break;
- case 1: Out << "__builtin_ia32_cmplt"; break;
- case 2: Out << "__builtin_ia32_cmple"; break;
- case 3: Out << "__builtin_ia32_cmpunord"; break;
- case 4: Out << "__builtin_ia32_cmpneq"; break;
- case 5: Out << "__builtin_ia32_cmpnlt"; break;
- case 6: Out << "__builtin_ia32_cmpnle"; break;
- case 7: Out << "__builtin_ia32_cmpord"; break;
- }
- if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
- Out << 'p';
- else
- Out << 's';
- if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
- Out << 's';
- else
- Out << 'd';
-
- Out << "(";
- writeOperand(I.getArgOperand(0));
- Out << ", ";
- writeOperand(I.getArgOperand(1));
- Out << ")";
- return true;
- case Intrinsic::ppc_altivec_lvsl:
- Out << '(';
- printType(Out, I.getType());
- Out << ')';
- Out << "__builtin_altivec_lvsl(0, (void*)";
- writeOperand(I.getArgOperand(0));
- Out << ")";
- return true;
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- Out << GetValueName(I.getCalledFunction()) << "(";
- writeOperand(I.getArgOperand(0));
- Out << ", ";
- writeOperand(I.getArgOperand(1));
- Out << ")";
- return true;
- }
-}
-
-//This converts the llvm constraint string to something gcc is expecting.
-//TODO: work out platform independent constraints and factor those out
-// of the per target tables
-// handle multiple constraint codes
-std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
- assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
-
- // Grab the translation table from MCAsmInfo if it exists.
- const MCAsmInfo *TargetAsm;
- std::string Triple = TheModule->getTargetTriple();
- if (Triple.empty())
- Triple = llvm::sys::getHostTriple();
-
- std::string E;
- if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
- TargetAsm = Match->createMCAsmInfo(Triple);
- else
- return c.Codes[0];
-
- const char *const *table = TargetAsm->getAsmCBE();
-
- // Search the translation table if it exists.
- for (int i = 0; table && table[i]; i += 2)
- if (c.Codes[0] == table[i]) {
- delete TargetAsm;
- return table[i+1];
- }
-
- // Default is identity.
- delete TargetAsm;
- return c.Codes[0];
-}
-
-//TODO: import logic from AsmPrinter.cpp
-static std::string gccifyAsm(std::string asmstr) {
- for (std::string::size_type i = 0; i != asmstr.size(); ++i)
- if (asmstr[i] == '\n')
- asmstr.replace(i, 1, "\\n");
- else if (asmstr[i] == '\t')
- asmstr.replace(i, 1, "\\t");
- else if (asmstr[i] == '$') {
- if (asmstr[i + 1] == '{') {
- std::string::size_type a = asmstr.find_first_of(':', i + 1);
- std::string::size_type b = asmstr.find_first_of('}', i + 1);
- std::string n = "%" +
- asmstr.substr(a + 1, b - a - 1) +
- asmstr.substr(i + 2, a - i - 2);
- asmstr.replace(i, b - i + 1, n);
- i += n.size() - 1;
- } else
- asmstr.replace(i, 1, "%");
- }
- else if (asmstr[i] == '%')//grr
- { asmstr.replace(i, 1, "%%"); ++i;}
-
- return asmstr;
-}
-
-//TODO: assumptions about what consume arguments from the call are likely wrong
-// handle communitivity
-void CWriter::visitInlineAsm(CallInst &CI) {
- InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
- InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
-
- std::vector<std::pair<Value*, int> > ResultVals;
- if (CI.getType() == Type::getVoidTy(CI.getContext()))
- ;
- else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
- for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
- ResultVals.push_back(std::make_pair(&CI, (int)i));
- } else {
- ResultVals.push_back(std::make_pair(&CI, -1));
- }
-
- // Fix up the asm string for gcc and emit it.
- Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
- Out << " :";
-
- unsigned ValueCount = 0;
- bool IsFirst = true;
-
- // Convert over all the output constraints.
- for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
- E = Constraints.end(); I != E; ++I) {
-
- if (I->Type != InlineAsm::isOutput) {
- ++ValueCount;
- continue; // Ignore non-output constraints.
- }
-
- assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
- std::string C = InterpretASMConstraint(*I);
- if (C.empty()) continue;
-
- if (!IsFirst) {
- Out << ", ";
- IsFirst = false;
- }
-
- // Unpack the dest.
- Value *DestVal;
- int DestValNo = -1;
-
- if (ValueCount < ResultVals.size()) {
- DestVal = ResultVals[ValueCount].first;
- DestValNo = ResultVals[ValueCount].second;
- } else
- DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
-
- if (I->isEarlyClobber)
- C = "&"+C;
-
- Out << "\"=" << C << "\"(" << GetValueName(DestVal);
- if (DestValNo != -1)
- Out << ".field" << DestValNo; // Multiple retvals.
- Out << ")";
- ++ValueCount;
- }
-
-
- // Convert over all the input constraints.
- Out << "\n :";
- IsFirst = true;
- ValueCount = 0;
- for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
- E = Constraints.end(); I != E; ++I) {
- if (I->Type != InlineAsm::isInput) {
- ++ValueCount;
- continue; // Ignore non-input constraints.
- }
-
- assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
- std::string C = InterpretASMConstraint(*I);
- if (C.empty()) continue;
-
- if (!IsFirst) {
- Out << ", ";
- IsFirst = false;
- }
-
- assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
- Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
-
- Out << "\"" << C << "\"(";
- if (!I->isIndirect)
- writeOperand(SrcVal);
- else
- writeOperandDeref(SrcVal);
- Out << ")";
- }
-
- // Convert over the clobber constraints.
- IsFirst = true;
- for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
- E = Constraints.end(); I != E; ++I) {
- if (I->Type != InlineAsm::isClobber)
- continue; // Ignore non-input constraints.
-
- assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
- std::string C = InterpretASMConstraint(*I);
- if (C.empty()) continue;
-
- if (!IsFirst) {
- Out << ", ";
- IsFirst = false;
- }
-
- Out << '\"' << C << '"';
- }
-
- Out << ")";
-}
-
-void CWriter::visitAllocaInst(AllocaInst &I) {
- Out << '(';
- printType(Out, I.getType());
- Out << ") alloca(sizeof(";
- printType(Out, I.getType()->getElementType());
- Out << ')';
- if (I.isArrayAllocation()) {
- Out << " * " ;
- writeOperand(I.getOperand(0));
- }
- Out << ')';
-}
-
-void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
- gep_type_iterator E, bool Static) {
-
- // If there are no indices, just print out the pointer.
- if (I == E) {
- writeOperand(Ptr);
- return;
- }
-
- // Find out if the last index is into a vector. If so, we have to print this
- // specially. Since vectors can't have elements of indexable type, only the
- // last index could possibly be of a vector element.
- VectorType *LastIndexIsVector = 0;
- {
- for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
- LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
- }
-
- Out << "(";
-
- // If the last index is into a vector, we can't print it as &a[i][j] because
- // we can't index into a vector with j in GCC. Instead, emit this as
- // (((float*)&a[i])+j)
- if (LastIndexIsVector) {
- Out << "((";
- printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
- Out << ")(";
- }
-
- Out << '&';
-
- // If the first index is 0 (very typical) we can do a number of
- // simplifications to clean up the code.
- Value *FirstOp = I.getOperand();
- if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
- // First index isn't simple, print it the hard way.
- writeOperand(Ptr);
- } else {
- ++I; // Skip the zero index.
-
- // Okay, emit the first operand. If Ptr is something that is already address
- // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
- if (isAddressExposed(Ptr)) {
- writeOperandInternal(Ptr, Static);
- } else if (I != E && (*I)->isStructTy()) {
- // If we didn't already emit the first operand, see if we can print it as
- // P->f instead of "P[0].f"
- writeOperand(Ptr);
- Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
- ++I; // eat the struct index as well.
- } else {
- // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
- Out << "(*";
- writeOperand(Ptr);
- Out << ")";
- }
- }
-
- for (; I != E; ++I) {
- if ((*I)->isStructTy()) {
- Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
- } else if ((*I)->isArrayTy()) {
- Out << ".array[";
- writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
- Out << ']';
- } else if (!(*I)->isVectorTy()) {
- Out << '[';
- writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
- Out << ']';
- } else {
- // If the last index is into a vector, then print it out as "+j)". This
- // works with the 'LastIndexIsVector' code above.
- if (isa<Constant>(I.getOperand()) &&
- cast<Constant>(I.getOperand())->isNullValue()) {
- Out << "))"; // avoid "+0".
- } else {
- Out << ")+(";
- writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
- Out << "))";
- }
- }
- }
- Out << ")";
-}
-
-void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
- bool IsVolatile, unsigned Alignment) {
-
- bool IsUnaligned = Alignment &&
- Alignment < TD->getABITypeAlignment(OperandType);
-
- if (!IsUnaligned)
- Out << '*';
- if (IsVolatile || IsUnaligned) {
- Out << "((";
- if (IsUnaligned)
- Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
- printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
- if (IsUnaligned) {
- Out << "; } ";
- if (IsVolatile) Out << "volatile ";
- Out << "*";
- }
- Out << ")";
- }
-
- writeOperand(Operand);
-
- if (IsVolatile || IsUnaligned) {
- Out << ')';
- if (IsUnaligned)
- Out << "->data";
- }
-}
-
-void CWriter::visitLoadInst(LoadInst &I) {
- writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
- I.getAlignment());
-
-}
-
-void CWriter::visitStoreInst(StoreInst &I) {
- writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
- I.isVolatile(), I.getAlignment());
- Out << " = ";
- Value *Operand = I.getOperand(0);
- Constant *BitMask = 0;
- if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
- if (!ITy->isPowerOf2ByteWidth())
- // We have a bit width that doesn't match an even power-of-2 byte
- // size. Consequently we must & the value with the type's bit mask
- BitMask = ConstantInt::get(ITy, ITy->getBitMask());
- if (BitMask)
- Out << "((";
- writeOperand(Operand);
- if (BitMask) {
- Out << ") & ";
- printConstant(BitMask, false);
- Out << ")";
- }
-}
-
-void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
- printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
- gep_type_end(I), false);
-}
-
-void CWriter::visitVAArgInst(VAArgInst &I) {
- Out << "va_arg(*(va_list*)";
- writeOperand(I.getOperand(0));
- Out << ", ";
- printType(Out, I.getType());
- Out << ");\n ";
-}
-
-void CWriter::visitInsertElementInst(InsertElementInst &I) {
- Type *EltTy = I.getType()->getElementType();
- writeOperand(I.getOperand(0));
- Out << ";\n ";
- Out << "((";
- printType(Out, PointerType::getUnqual(EltTy));
- Out << ")(&" << GetValueName(&I) << "))[";
- writeOperand(I.getOperand(2));
- Out << "] = (";
- writeOperand(I.getOperand(1));
- Out << ")";
-}
-
-void CWriter::visitExtractElementInst(ExtractElementInst &I) {
- // We know that our operand is not inlined.
- Out << "((";
- Type *EltTy =
- cast<VectorType>(I.getOperand(0)->getType())->getElementType();
- printType(Out, PointerType::getUnqual(EltTy));
- Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
- writeOperand(I.getOperand(1));
- Out << "]";
-}
-
-void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
- Out << "(";
- printType(Out, SVI.getType());
- Out << "){ ";
- VectorType *VT = SVI.getType();
- unsigned NumElts = VT->getNumElements();
- Type *EltTy = VT->getElementType();
-
- for (unsigned i = 0; i != NumElts; ++i) {
- if (i) Out << ", ";
- int SrcVal = SVI.getMaskValue(i);
- if ((unsigned)SrcVal >= NumElts*2) {
- Out << " 0/*undef*/ ";
- } else {
- Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
- if (isa<Instruction>(Op)) {
- // Do an extractelement of this value from the appropriate input.
- Out << "((";
- printType(Out, PointerType::getUnqual(EltTy));
- Out << ")(&" << GetValueName(Op)
- << "))[" << (SrcVal & (NumElts-1)) << "]";
- } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
- Out << "0";
- } else {
- printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
- (NumElts-1)),
- false);
- }
- }
- }
- Out << "}";
-}
-
-void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
- // Start by copying the entire aggregate value into the result variable.
- writeOperand(IVI.getOperand(0));
- Out << ";\n ";
-
- // Then do the insert to update the field.
- Out << GetValueName(&IVI);
- for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
- i != e; ++i) {
- Type *IndexedTy =
- ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
- makeArrayRef(b, i+1));
- if (IndexedTy->isArrayTy())
- Out << ".array[" << *i << "]";
- else
- Out << ".field" << *i;
- }
- Out << " = ";
- writeOperand(IVI.getOperand(1));
-}
-
-void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
- Out << "(";
- if (isa<UndefValue>(EVI.getOperand(0))) {
- Out << "(";
- printType(Out, EVI.getType());
- Out << ") 0/*UNDEF*/";
- } else {
- Out << GetValueName(EVI.getOperand(0));
- for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
- i != e; ++i) {
- Type *IndexedTy =
- ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
- makeArrayRef(b, i+1));
- if (IndexedTy->isArrayTy())
- Out << ".array[" << *i << "]";
- else
- Out << ".field" << *i;
- }
- }
- Out << ")";
-}
-
-//===----------------------------------------------------------------------===//
-// External Interface declaration
-//===----------------------------------------------------------------------===//
-
-bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify) {
- if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
-
- PM.add(createGCLoweringPass());
- PM.add(createLowerInvokePass());
- PM.add(createCFGSimplificationPass()); // clean up after lower invoke.
- PM.add(new CWriter(o));
- PM.add(createGCInfoDeleter());
- return false;
-}
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
deleted file mode 100644
index 96ae49f01fc0..000000000000
--- a/lib/Target/CBackend/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-add_llvm_target(CBackend
- CBackend.cpp
- )
-
-add_llvm_library_dependencies(LLVMCBackend
- LLVMAnalysis
- LLVMCBackendInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
-
-add_subdirectory(TargetInfo)
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
deleted file mode 100644
index 4f1ca974ded9..000000000000
--- a/lib/Target/CBackend/CTargetMachine.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the TargetMachine that is used by the C backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CTARGETMACHINE_H
-#define CTARGETMACHINE_H
-
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-
-namespace llvm {
-
-struct CTargetMachine : public TargetMachine {
- CTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : TargetMachine(T, TT, CPU, FS) {}
-
- virtual bool addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
-
- virtual const TargetData *getTargetData() const { return 0; }
-};
-
-extern Target TheCBackendTarget;
-
-} // End llvm namespace
-
-
-#endif
diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
deleted file mode 100644
index e8274ff9ce5a..000000000000
--- a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CTargetMachine.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheCBackendTarget;
-
-extern "C" void LLVMInitializeCBackendTargetInfo() {
- RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
-}
-
-extern "C" void LLVMInitializeCBackendTargetMC() {}
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 8e616bebd532..000000000000
--- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMCBackendInfo
- CBackendTargetInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMCBackendInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/CBackend/TargetInfo/Makefile
deleted file mode 100644
index d4d5e15b40bb..000000000000
--- a/lib/Target/CBackend/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCBackendInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 030f8089abf7..5913a9c4ccdd 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -3,58 +3,18 @@ add_llvm_library(LLVMTarget
Target.cpp
TargetData.cpp
TargetELFWriterInfo.cpp
- TargetFrameLowering.cpp
TargetInstrInfo.cpp
TargetIntrinsicInfo.cpp
+ TargetJITInfo.cpp
TargetLibraryInfo.cpp
TargetLoweringObjectFile.cpp
TargetMachine.cpp
+ TargetMachineC.cpp
TargetRegisterInfo.cpp
TargetSubtargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMTarget
- LLVMCore
- LLVMMC
- LLVMSupport
- )
-
-set(LLVM_ENUM_ASM_PRINTERS "")
-set(LLVM_ENUM_ASM_PARSERS "")
-set(LLVM_ENUM_DISASSEMBLERS "")
foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
- set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} )
- file(GLOB asmp_file "${td}/*AsmPrinter.cpp")
- if( asmp_file )
- set(LLVM_ENUM_ASM_PRINTERS
- "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
- endif()
- if( EXISTS ${td}/AsmParser/CMakeLists.txt )
- set(LLVM_ENUM_ASM_PARSERS
- "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
- endif()
- if( EXISTS ${td}/Disassembler/CMakeLists.txt )
- set(LLVM_ENUM_DISASSEMBLERS
- "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n")
- endif()
-endforeach(t)
-
-# Produce llvm/Config/AsmPrinters.def
-configure_file(
- ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
- ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
- )
-
-# Produce llvm/Config/AsmParsers.def
-configure_file(
- ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
- ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
- )
-
-# Produce llvm/Config/Disassemblers.def
-configure_file(
- ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in
- ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def
- )
+endforeach()
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index 158fb3eacce3..cf4f796ec2fb 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -1,12 +1,12 @@
set(LLVM_TARGET_DEFINITIONS SPU.td)
-llvm_tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(SPUGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(SPUGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(SPUGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(SPUGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(SPUGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(SPUGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv)
add_public_tablegen_target(CellSPUCommonTableGen)
add_llvm_target(CellSPUCodeGen
@@ -16,6 +16,7 @@ add_llvm_target(CellSPUCodeGen
SPUISelDAGToDAG.cpp
SPUISelLowering.cpp
SPUFrameLowering.cpp
+ SPUMachineFunction.cpp
SPURegisterInfo.cpp
SPUSubtarget.cpp
SPUTargetMachine.cpp
@@ -23,17 +24,5 @@ add_llvm_target(CellSPUCodeGen
SPUNopFiller.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUCodeGen
- LLVMAsmPrinter
- LLVMCellSPUDesc
- LLVMCellSPUInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
index 9468aee067a3..cdb4099ffbca 100644
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ b/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -1,5 +1,5 @@
//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
new file mode 100644
index 000000000000..277620bf4e59
--- /dev/null
+++ b/lib/Target/CellSPU/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = CellSPU
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = CellSPUCodeGen
+parent = CellSPU
+required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
index d41fe934e2c5..0027bdbf6ca1 100644
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc
SPUMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUDesc
- LLVMCellSPUInfo
- LLVMMC
- )
-
add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..71e5bbc629ca
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CellSPUDesc
+parent = CellSPU
+required_libraries = CellSPUInfo MC
+add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
index 8c1176a9d028..4bad37eacaf7 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -14,6 +14,8 @@
#include "SPUMCAsmInfo.h"
using namespace llvm;
+void SPULinuxMCAsmInfo::anchor() { }
+
SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
IsLittleEndian = false;
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
index 7f850d347f56..f786147b9267 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
@@ -20,7 +20,9 @@
namespace llvm {
class Target;
- struct SPULinuxMCAsmInfo : public MCAsmInfo {
+ class SPULinuxMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
index d5af2a88aed1..8450e2c6634c 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,6 +18,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -62,11 +63,12 @@ static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// For the time being, use static relocations, since there's really no
// support for PIC yet.
- X->InitMCCodeGenInfo(Reloc::Static, CM);
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
return X;
}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
index a3717b0bfc10..d26449e8908f 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -15,9 +15,7 @@
#define SPUMCTARGETDESC_H
namespace llvm {
-class MCSubtargetInfo;
class Target;
-class StringRef;
extern Target TheCellSPUTarget;
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index b51fbc7a5197..c660131706cb 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -1,4 +1,4 @@
-//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
index 8327fe03d7f8..e835b9cac8e1 100644
--- a/lib/Target/CellSPU/SPU.td
+++ b/lib/Target/CellSPU/SPU.td
@@ -1,5 +1,5 @@
-//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
-//
+//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
index 3031fda54381..e051e047333a 100644
--- a/lib/Target/CellSPU/SPU128InstrInfo.td
+++ b/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -1,9 +1,9 @@
-//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===//
//
// Cell SPU 128-bit operations
//
//===----------------------------------------------------------------------===//
-
+
// zext 32->128: Zero extend 32-bit to 128-bit
def : Pat<(i128 (zext R32C:$rSrc)),
(ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index f340edfb0f86..bea33b5362d2 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -1,4 +1,4 @@
-//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===//
//
// Cell SPU 64-bit operations
//
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 90b5270a9dae..14021fef05d9 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -248,7 +248,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
switch (MO.getType()) {
case MachineOperand::MO_Immediate:
report_fatal_error("printOp() does not handle immediate values");
- return;
case MachineOperand::MO_MachineBasicBlock:
O << *MO.getMBB()->getSymbol();
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index 04fa2ae866d6..9f9692bf67fe 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -1,10 +1,10 @@
//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the STI Cell SPU architecture.
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 093f99f28711..fac806e1b0ea 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SPU.h"
#include "SPUFrameLowering.h"
+#include "SPU.h"
#include "SPUInstrBuilder.h"
#include "SPUInstrInfo.h"
#include "llvm/Function.h"
@@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getStackSize() &&
- (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+ (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects());
}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
index b837f2cf94e1..11c52818dd9c 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.h
+++ b/lib/Target/CellSPU/SPUFrameLowering.h
@@ -1,4 +1,4 @@
-//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index a297d036f03e..c27caeae7d45 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Constants.h"
@@ -91,8 +90,6 @@ namespace {
short s_val = (short) i_val;
return i_val == s_val;
}
-
- return false;
}
//! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
@@ -216,7 +213,7 @@ namespace {
HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
CurDAG->getEntryNode(), CGPoolOffset,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment));
+ false, false, false, Alignment));
CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
return N;
@@ -287,8 +284,8 @@ namespace {
llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
#else
SelectAddrIdxOnly(Op, Op, Op0, Op1);
-#endif
break;
+#endif
}
OutOps.push_back(Op0);
@@ -327,7 +324,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
Base = CurDAG->getTargetConstant( val , MVT::i32);
Index = Zero;
- return true; break;
+ return true;
case ISD::ConstantPool:
case ISD::GlobalAddress:
report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
@@ -579,22 +576,16 @@ SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
switch( VT.SimpleTy ) {
case MVT::i8:
return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
- break;
case MVT::i16:
return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
- break;
case MVT::i32:
return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
- break;
case MVT::f32:
return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
- break;
case MVT::i64:
return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
- break;
case MVT::i128:
return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
- break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -602,11 +593,10 @@ SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
case MVT::v2i64:
case MVT::v2f64:
return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
- break;
default:
assert( false && "add a new case here" );
+ return SDValue();
}
- return SDValue();
}
//! Convert the operand from a target-independent to a target-specific node
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ac33111f74ae..062374127e2f 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -27,19 +27,14 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <map>
using namespace llvm;
-// Used in getTargetNodeName() below
namespace {
- std::map<unsigned, const char *> node_names;
-
// Byte offset of the preferred slot (counted from the MSB)
int prefslotOffset(EVT VT) {
int retval=0;
@@ -84,8 +79,9 @@ namespace {
Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Op.getDebugLoc());
return CallInfo.first;
@@ -296,12 +292,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
setOperationAction(ISD::CTLZ , MVT::i8, Promote);
setOperationAction(ISD::CTLZ , MVT::i16, Promote);
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
@@ -424,6 +430,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
+ // Expand all trunc stores
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, TargetVT, Expand);
+ }
+
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -434,6 +447,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+
setOperationAction(ISD::AND, MVT::v16i8, Custom);
setOperationAction(ISD::OR, MVT::v16i8, Custom);
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
@@ -462,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
}
-const char *
-SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- if (node_names.empty()) {
- node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
- node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
- node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
- node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
- node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
- node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
- node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
- node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
- node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
- node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
- node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
- node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
- node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
- node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
- node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
- "SPUISD::ROTBYTES_LEFT_BITS";
- node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
- node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
- node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
- node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
- node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
- }
-
- std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
-
- return ((i != node_names.end()) ? i->second : 0);
+const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
+ case SPUISD::Hi: return "SPUISD::Hi";
+ case SPUISD::Lo: return "SPUISD::Lo";
+ case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
+ case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
+ case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
+ case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
+ case SPUISD::CALL: return "SPUISD::CALL";
+ case SPUISD::SHUFB: return "SPUISD::SHUFB";
+ case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
+ case SPUISD::CNTB: return "SPUISD::CNTB";
+ case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
+ case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
+ case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
+ case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
+ case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
+ case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
+ case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
+ case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
+ case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
+ case SPUISD::SELB: return "SPUISD::SELB";
+ case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
+ case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
+ case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
+ }
}
//===----------------------------------------------------------------------===//
@@ -658,7 +667,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
// Do the load as a i128 to allow possible shifting
SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), 16);
+ LN->isVolatile(), LN->isNonTemporal(), false, 16);
// When the size is not greater than alignment we get all data with just
// one load
@@ -695,7 +704,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
basePtr,
DAG.getConstant(16, PtrVT)),
highMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), 16);
+ LN->isVolatile(), LN->isNonTemporal(), false,
+ 16);
the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
high.getValue(1));
@@ -850,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
// Load the lower part of the memory to which to store.
SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
- lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
+ false, 16);
// if we don't need to store over the 16 byte boundary, one store suffices
if (alignment >= StVT.getSizeInBits()/8) {
@@ -950,7 +961,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
DAG.getConstant( 16, PtrVT)),
highMemPtr,
- SN->isVolatile(), SN->isNonTemporal(), 16);
+ SN->isVolatile(), SN->isNonTemporal(),
+ false, 16);
the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
hi.getValue(1));
@@ -1017,7 +1029,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
llvm_unreachable("LowerConstantPool: Relocation model other than static"
" not supported.");
- return SDValue();
}
//! Alternate entry point for generating the address of a constant pool entry
@@ -1048,7 +1059,6 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
llvm_unreachable("LowerJumpTable: Relocation model other than static"
" not supported.");
- return SDValue();
}
static SDValue
@@ -1076,8 +1086,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
"not supported.");
/*NOTREACHED*/
}
-
- return SDValue();
}
//! Custom lower double precision floating point constants
@@ -1185,7 +1193,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
ArgOffset += StackSlotSize;
}
@@ -1198,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
// FIXME: we should be able to query the argument registers from
// tablegen generated code.
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
@@ -1212,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
};
// size of ArgRegs array
- unsigned NumArgRegs = 77;
+ const unsigned NumArgRegs = 77;
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
@@ -1257,7 +1265,7 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
SDValue
SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1675,7 +1683,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(Value32, MVT::i32);
return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
- break;
}
case MVT::v2f64: {
uint64_t f64val = uint64_t(SplatBits);
@@ -1685,7 +1692,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(f64val, MVT::i64);
return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
- break;
}
case MVT::v16i8: {
// 8-bit constants have to be expanded to 16-bits
@@ -1712,8 +1718,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
}
-
- return SDValue();
}
/*!
@@ -1743,9 +1747,11 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
// Both upper and lower are special, lower to a constant pool load:
if (lower_special && upper_special) {
- SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
- SplatValCN, SplatValCN);
+ SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
+ SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ UpperVal, LowerVal, UpperVal, LowerVal);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
}
SDValue LO32;
@@ -1985,8 +1991,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
}
}
-
- return SDValue();
}
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
@@ -2020,8 +2024,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
int elt_byte = EltNo * VT.getSizeInBits() / 8;
switch (VT.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Invalid value type!");
+ default: llvm_unreachable("Invalid value type!");
case MVT::i8: {
prefslot_begin = prefslot_end = 3;
break;
@@ -2199,8 +2202,6 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
switch (Opc) {
default:
llvm_unreachable("Unhandled i8 math operator");
- /*NOTREACHED*/
- break;
case ISD::ADD: {
// 8-bit addition: Promote the arguments up to 16-bits and truncate
// the result:
@@ -2285,11 +2286,8 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- break;
}
}
-
- return SDValue();
}
//! Lower byte immediate operations for v16i8 vectors:
@@ -2354,8 +2352,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
switch (VT.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Invalid value type!");
+ default: llvm_unreachable("Invalid value type!");
case MVT::i8: {
SDValue N = Op.getOperand(0);
SDValue Elt0 = DAG.getConstant(0, MVT::i32);
@@ -3161,7 +3158,6 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
//! Compute used/known bits for a SPU operand
void
SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -3227,7 +3223,7 @@ bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
return (V > -(1 << 18) && V < (1 << 18) - 1);
}
-bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
return false;
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index aa4a1687278a..e3db7b2f1fbc 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -15,9 +15,9 @@
#ifndef SPU_ISELLOWERING_H
#define SPU_ISELLOWERING_H
+#include "SPU.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "SPU.h"
namespace llvm {
namespace SPUISD {
@@ -121,7 +121,6 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -162,7 +161,7 @@ namespace llvm {
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
index 5e268f8767c2..b495537fc2c8 100644
--- a/lib/Target/CellSPU/SPUInstrBuilder.h
+++ b/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -1,4 +1,4 @@
-//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
index bdbe2552dcdd..cd3f42214345 100644
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -1,10 +1,10 @@
-//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
-//
+//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 007bc0e02c7e..759923d7bb42 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index bc1ba71f7a45..85e5821aefa1 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -1,4 +1,4 @@
-//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,8 +15,8 @@
#define SPU_INSTRUCTIONINFO_H
#include "SPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SPUGenInstrInfo.inc"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/CellSPU/SPUMachineFunction.cpp
new file mode 100644
index 000000000000..3e948d071d63
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMachineFunction.cpp
@@ -0,0 +1,14 @@
+//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMachineFunction.h"
+
+using namespace llvm;
+
+void SPUFunctionInfo::anchor() { }
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
index 3ef3ccbcaaee..399684bb0887 100644
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ b/lib/Target/CellSPU/SPUMachineFunction.h
@@ -21,7 +21,8 @@ namespace llvm {
/// SPUFunctionInfo - Cell SPU target-specific information for each
/// MachineFunction
class SPUFunctionInfo : public MachineFunctionInfo {
-private:
+ virtual void anchor();
+
/// UsesLR - Indicates whether LR is used in the current function.
///
bool UsesLR;
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
index ed7129e33291..9a5c3976afbe 100644
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -1,4 +1,4 @@
-//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
//
// Cell SPU math operations
//
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index a6e621f36b35..a47e9ef0167c 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -1,4 +1,4 @@
-//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
index e2bd2d7f4100..7c58041e3b84 100644
--- a/lib/Target/CellSPU/SPUNopFiller.cpp
+++ b/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -1,4 +1,4 @@
-//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 96cde51709ec..6f8deef5530f 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -1,10 +1,10 @@
-//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
-//
+//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
// Cell SPU Instruction Operands:
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index bbac6fd0be54..1b2da5f50c81 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "reginfo"
-#include "SPU.h"
#include "SPURegisterInfo.h"
+#include "SPU.h"
#include "SPUInstrBuilder.h"
#include "SPUSubtarget.h"
#include "SPUMachineFunction.h"
@@ -197,11 +197,11 @@ SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
return &SPU::R32CRegClass;
}
-const unsigned *
+const uint16_t *
SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
{
// Cell ABI calling convention
- static const unsigned SPU_CalleeSaveRegs[] = {
+ static const uint16_t SPU_CalleeSaveRegs[] = {
SPU::R80, SPU::R81, SPU::R82, SPU::R83,
SPU::R84, SPU::R85, SPU::R86, SPU::R87,
SPU::R88, SPU::R89, SPU::R90, SPU::R91,
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index b7818a47abd7..e5ab22422502 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -57,7 +57,7 @@ namespace llvm {
}
//! Return the array of callee-saved registers
- virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+ virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
//! Allow for scavenging, so we can get scratch registers when needed.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index e16f51ff0e02..f27b042edd63 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -1,10 +1,10 @@
-//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
-//
+//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
//
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
index 9cd3c2327df0..9ccd0844e48e 100644
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -1,10 +1,10 @@
-//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
-//
+//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 43335abf0ac2..eec2d250be7f 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,7 +15,6 @@
#include "SPU.h"
#include "SPURegisterInfo.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 93a7f6e36501..21f6b25bf256 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -11,17 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "SPU.h"
#include "SPUTargetMachine.h"
+#include "SPU.h"
#include "llvm/PassManager.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-extern "C" void LLVMInitializeCellSPUTarget() {
+extern "C" void LLVMInitializeCellSPUTarget() {
// Register the target.
RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
}
@@ -34,8 +33,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
@@ -49,16 +50,34 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
-bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+namespace {
+/// SPU Code Generator Pass Configuration Options.
+class SPUPassConfig : public TargetPassConfig {
+public:
+ SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SPUTargetMachine &getSPUTargetMachine() const {
+ return getTM<SPUTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SPUPassConfig(this, PM);
+}
+
+bool SPUPassConfig::addInstSelector() {
// Install an instruction selector.
- PM.add(createSPUISelDag(*this));
+ PM.add(createSPUISelDag(getSPUTargetMachine()));
return false;
}
// passes to run just before printing the assembly
-bool SPUTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool SPUPassConfig::addPreEmitPass() {
// load the TCE instruction scheduler, if available via
// loaded plugins
typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
@@ -69,6 +88,6 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
PM.add(schedulerCreator("cellspu"));
//align instructions with nops/lnops for dual issue
- PM.add(createSPUNopFillerPass(*this));
+ PM.add(createSPUNopFillerPass(getSPUTargetMachine()));
return true;
}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index fffe77cabba3..3e5d38c919c1 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,9 +23,6 @@
#include "llvm/Target/TargetData.h"
namespace llvm {
-class PassManager;
-class GlobalValue;
-class TargetFrameLowering;
/// SPUTargetMachine
///
@@ -39,8 +36,9 @@ class SPUTargetMachine : public LLVMTargetMachine {
InstrItineraryData InstrItins;
public:
SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
/// Return the subtarget implementation object
virtual const SPUSubtarget *getSubtargetImpl() const {
@@ -60,7 +58,7 @@ public:
return NULL;
}
- virtual const SPUTargetLowering *getTargetLowering() const {
+ virtual const SPUTargetLowering *getTargetLowering() const {
return &TLInfo;
}
@@ -71,7 +69,7 @@ public:
virtual const SPURegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
-
+
virtual const TargetData *getTargetData() const {
return &DataLayout;
}
@@ -79,11 +77,9 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
-
+
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
} // end namespace llvm
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
index 3f2d6b09adad..6a98f95db664 100644
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo
CellSPUTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..6937e705ff7f
--- /dev/null
+++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CellSPUInfo
+parent = CellSPU
+required_libraries = MC Support Target
+add_to_library_groups = CellSPU
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index 95b6058243dc..515e1dd7e39f 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -1,12 +1,5 @@
-add_llvm_target(CppBackend
+add_llvm_target(CppBackendCodeGen
CPPBackend.cpp
)
-add_llvm_library_dependencies(LLVMCppBackend
- LLVMCore
- LLVMCppBackendInfo
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 394ea2bfea02..69f0ff87eda0 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -33,8 +33,9 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include <algorithm>
-#include <set>
+#include <cstdio>
#include <map>
+#include <set>
using namespace llvm;
static cl::opt<std::string>
@@ -189,13 +190,24 @@ static std::string getTypePrefix(Type *Ty) {
case Type::VectorTyID: return "packed_";
default: return "other_";
}
- return "unknown_";
}
void CppWriter::error(const std::string& msg) {
report_fatal_error(msg);
}
+static inline std::string ftostr(const APFloat& V) {
+ std::string Buf;
+ if (&V.getSemantics() == &APFloat::IEEEdouble) {
+ raw_string_ostream(Buf) << V.convertToDouble();
+ return Buf;
+ } else if (&V.getSemantics() == &APFloat::IEEEsingle) {
+ raw_string_ostream(Buf) << (double)V.convertToFloat();
+ return Buf;
+ }
+ return "<unknown format in ftostr>"; // error
+}
+
// printCFP - Print a floating point constant .. very carefully :)
// This makes sure that conversion to/from floating yields the same binary
// result so that we don't lose precision.
@@ -301,7 +313,6 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
switch (VisType) {
- default: llvm_unreachable("Unknown GVar visibility");
case GlobalValue::DefaultVisibility:
Out << "GlobalValue::DefaultVisibility";
break;
@@ -443,7 +454,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
unsigned index = PAL.getSlot(i).Index;
Attributes attrs = PAL.getSlot(i).Attrs;
- Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = Attribute::None ";
#define HANDLE_ATTR(X) \
if (attrs & Attribute::X) \
Out << " | Attribute::" #X; \
@@ -678,11 +689,6 @@ void CppWriter::printConstant(const Constant *CV) {
std::string constName(getCppName(CV));
std::string typeName(getCppName(CV->getType()));
- if (isa<GlobalValue>(CV)) {
- // Skip variables and functions, we emit them elsewhere
- return;
- }
-
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
std::string constValue = CI->getValue().toString(10, true);
Out << "ConstantInt* " << constName
@@ -700,38 +706,17 @@ void CppWriter::printConstant(const Constant *CV) {
printCFP(CFP);
Out << ";";
} else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
- if (CA->isString() &&
- CA->getType()->getElementType() ==
- Type::getInt8Ty(CA->getContext())) {
- Out << "Constant* " << constName <<
- " = ConstantArray::get(mod->getContext(), \"";
- std::string tmp = CA->getAsString();
- bool nullTerminate = false;
- if (tmp[tmp.length()-1] == 0) {
- tmp.erase(tmp.length()-1);
- nullTerminate = true;
- }
- printEscapedString(tmp);
- // Determine if we want null termination or not.
- if (nullTerminate)
- Out << "\", true"; // Indicate that the null terminator should be
- // added.
- else
- Out << "\", false";// No null terminator
- Out << ");";
- } else {
- Out << "std::vector<Constant*> " << constName << "_elems;";
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CA->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CA->getOperand(i)); // recurse to print operands
+ Out << constName << "_elems.push_back("
+ << getCppName(CA->getOperand(i)) << ");";
nl(Out);
- unsigned N = CA->getNumOperands();
- for (unsigned i = 0; i < N; ++i) {
- printConstant(CA->getOperand(i)); // recurse to print operands
- Out << constName << "_elems.push_back("
- << getCppName(CA->getOperand(i)) << ");";
- nl(Out);
- }
- Out << "Constant* " << constName << " = ConstantArray::get("
- << typeName << ", " << constName << "_elems);";
}
+ Out << "Constant* " << constName << " = ConstantArray::get("
+ << typeName << ", " << constName << "_elems);";
} else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
Out << "std::vector<Constant*> " << constName << "_fields;";
nl(Out);
@@ -744,14 +729,14 @@ void CppWriter::printConstant(const Constant *CV) {
}
Out << "Constant* " << constName << " = ConstantStruct::get("
<< typeName << ", " << constName << "_fields);";
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ } else if (const ConstantVector *CVec = dyn_cast<ConstantVector>(CV)) {
Out << "std::vector<Constant*> " << constName << "_elems;";
nl(Out);
- unsigned N = CP->getNumOperands();
+ unsigned N = CVec->getNumOperands();
for (unsigned i = 0; i < N; ++i) {
- printConstant(CP->getOperand(i));
+ printConstant(CVec->getOperand(i));
Out << constName << "_elems.push_back("
- << getCppName(CP->getOperand(i)) << ");";
+ << getCppName(CVec->getOperand(i)) << ");";
nl(Out);
}
Out << "Constant* " << constName << " = ConstantVector::get("
@@ -759,6 +744,41 @@ void CppWriter::printConstant(const Constant *CV) {
} else if (isa<UndefValue>(CV)) {
Out << "UndefValue* " << constName << " = UndefValue::get("
<< typeName << ");";
+ } else if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(CV)) {
+ if (CDS->isString()) {
+ Out << "Constant *" << constName <<
+ " = ConstantDataArray::getString(mod->getContext(), \"";
+ StringRef Str = CDS->getAsString();
+ bool nullTerminate = false;
+ if (Str.back() == 0) {
+ Str = Str.drop_back();
+ nullTerminate = true;
+ }
+ printEscapedString(Str);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true);";
+ else
+ Out << "\", false);";// No null terminator
+ } else {
+ // TODO: Could generate more efficient code generating CDS calls instead.
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ for (unsigned i = 0; i != CDS->getNumElements(); ++i) {
+ Constant *Elt = CDS->getElementAsConstant(i);
+ printConstant(Elt);
+ Out << constName << "_elems.push_back(" << getCppName(Elt) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName;
+
+ if (isa<ArrayType>(CDS->getType()))
+ Out << " = ConstantArray::get(";
+ else
+ Out << " = ConstantVector::get(";
+ Out << typeName << ", " << constName << "_elems);";
+ }
} else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
if (CE->getOpcode() == Instruction::GetElementPtr) {
Out << "std::vector<Constant*> " << constName << "_indices;";
@@ -1083,10 +1103,10 @@ void CppWriter::printInstruction(const Instruction *I,
<< getOpName(SI->getDefaultDest()) << ", "
<< SI->getNumCases() << ", " << bbname << ");";
nl(Out);
- unsigned NumCases = SI->getNumCases();
- for (unsigned i = 1; i < NumCases; ++i) {
- const ConstantInt* CaseVal = SI->getCaseValue(i);
- const BasicBlock* BB = SI->getSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ const ConstantInt* CaseVal = i.getCaseValue();
+ const BasicBlock *BB = i.getCaseSuccessor();
Out << iName << "->addCase("
<< getOpName(CaseVal) << ", "
<< getOpName(BB) << ");";
@@ -1135,11 +1155,6 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
break;
}
- case Instruction::Unwind: {
- Out << "new UnwindInst("
- << bbname << ");";
- break;
- }
case Instruction::Unreachable: {
Out << "new UnreachableInst("
<< "mod->getContext(), "
@@ -1354,7 +1369,7 @@ void CppWriter::printInstruction(const Instruction *I,
case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
case Instruction::BitCast: Out << "BitCastInst"; break;
- default: assert(0 && "Unreachable"); break;
+ default: llvm_unreachable("Unreachable");
}
Out << "(" << opNames[0] << ", "
<< getCppName(cst->getType()) << ", \"";
@@ -2049,8 +2064,6 @@ bool CppWriter::runOnModule(Module &M) {
fname = "makeLLVMType";
printType(fname,tgtname);
break;
- default:
- error("Invalid generation option");
}
return false;
@@ -2065,7 +2078,6 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 287e53727139..92bca6c3c770 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,14 +23,14 @@ class formatted_raw_ostream;
struct CPPTargetMachine : public TargetMachine {
CPPTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : TargetMachine(T, TT, CPU, FS) {}
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, TT, CPU, FS, Options) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt
new file mode 100644
index 000000000000..122b5e7502fc
--- /dev/null
+++ b/lib/Target/CppBackend/LLVMBuild.txt
@@ -0,0 +1,31 @@
+;===- ./lib/Target/CppBackend/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TargetInfo
+
+[component_0]
+type = TargetGroup
+name = CppBackend
+parent = Target
+
+[component_1]
+type = Library
+name = CppBackendCodeGen
+parent = CppBackend
+required_libraries = Core CppBackendInfo Support Target
+add_to_library_groups = CppBackend
diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile
index d75f4e872265..efc7463fda3d 100644
--- a/lib/Target/CppBackend/Makefile
+++ b/lib/Target/CppBackend/Makefile
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../..
-LIBRARYNAME = LLVMCppBackend
+LIBRARYNAME = LLVMCppBackendCodeGen
DIRS = TargetInfo
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
index 7165d8fdf2cd..f82d72e378cb 100644
--- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -3,8 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMCppBackendInfo
CppBackendTargetInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMCppBackendInfo
- LLVMMC
- LLVMTarget
- )
diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..d4dfc3ef0406
--- /dev/null
+++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CppBackend/TargetInfo/LLVMBuild.txt ---------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CppBackendInfo
+parent = CppBackend
+required_libraries = MC Support Target
+add_to_library_groups = CppBackend
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
new file mode 100644
index 000000000000..8a49cd8105a7
--- /dev/null
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -0,0 +1,37 @@
+set(LLVM_TARGET_DEFINITIONS Hexagon.td)
+
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
+add_public_tablegen_target(HexagonCommonTableGen)
+
+add_llvm_target(HexagonCodeGen
+ HexagonAsmPrinter.cpp
+ HexagonCFGOptimizer.cpp
+ HexagonCallingConvLower.cpp
+ HexagonExpandPredSpillCode.cpp
+ HexagonFrameLowering.cpp
+ HexagonHardwareLoops.cpp
+ HexagonISelDAGToDAG.cpp
+ HexagonISelLowering.cpp
+ HexagonInstrInfo.cpp
+ HexagonMCInstLower.cpp
+ HexagonPeephole.cpp
+ HexagonRegisterInfo.cpp
+ HexagonRemoveSZExtArgs.cpp
+ HexagonSelectionDAGInfo.cpp
+ HexagonSplitTFRCondSets.cpp
+ HexagonSubtarget.cpp
+ HexagonTargetMachine.cpp
+ HexagonTargetObjectFile.cpp
+ HexagonVLIWPacketizer.cpp
+)
+
+add_subdirectory(TargetInfo)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 000000000000..43858b9624f1
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,74 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_Hexagon_H
+#define TARGET_Hexagon_H
+
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class MachineInstr;
+ class MCInst;
+ class HexagonAsmPrinter;
+ class HexagonTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
+ FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
+
+ FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+
+ FunctionPass *createHexagonHardwareLoops();
+ FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonPacketizer();
+
+/* TODO: object output.
+ MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
+ TargetMachine &TM,
+ MCContext &Ctx);
+*/
+/* TODO: assembler input.
+ TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &);
+*/
+ void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
+ HexagonAsmPrinter &AP);
+} // end namespace llvm;
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words in a packet (in instructions).
+#define HEXAGON_PACKET_SIZE 4
+
+#endif
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 000000000000..4a50d1609308
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,72 @@
+//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//===----------------------------------------------------------------------===//
+
+// Hexagon Archtectures
+def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
+ "Hexagon v2">;
+def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
+ "Hexagon v3">;
+def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
+ "Hexagon v4">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+def HexagonInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries Itin,
+ list<SubtargetFeature> Features>
+ : Processor<Name, Itin, Features>;
+
+def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>;
+def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
+
+// Hexagon Uses the MC printer for assembler output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def HexagonAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Hexagon : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = HexagonInstrInfo;
+
+ let AssemblyWriters = [HexagonAsmWriter];
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 000000000000..2cc8b814a022
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,313 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonMCInst.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "InstPrinter/HexagonInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<bool> AlignCalls(
+ "hexagon-align-calls", cl::Hidden, cl::init(true),
+ cl::desc("Insert falign after call instruction for Hexagon target"));
+
+void HexagonAsmPrinter::EmitAlignment(unsigned NumBits,
+ const GlobalValue *GV) const {
+ // For basic block level alignment, use ".falign".
+ if (!GV) {
+ OutStreamer.EmitRawText(StringRef("\t.falign"));
+ return;
+ }
+
+ AsmPrinter::EmitAlignment(NumBits, GV);
+}
+
+void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ default: llvm_unreachable("<unknown operand type>");
+ case MachineOperand::MO_Register:
+ O << HexagonInstPrinter::getRegisterName(MO.getReg());
+ return;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ // FIXME: PIC relocation model.
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ // Computing the address of a global symbol, not calling it.
+ O << *Mang->getSymbol(MO.getGlobal());
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+}
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ if (MBB->hasAddressTaken()) {
+ return false;
+ }
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ // Hexagon never has a prefix.
+ printOperand(MI, OpNo, OS);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ OS << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, OS);
+ return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &Base = MI->getOperand(OpNo);
+ const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+ if (Base.isReg())
+ printOperand(MI, OpNo, O);
+ else
+ llvm_unreachable("Unimplemented");
+
+ if (Offset.isImm()) {
+ if (Offset.getImm())
+ O << " + #" << Offset.getImm();
+ }
+ else
+ llvm_unreachable("Unimplemented");
+
+ return false;
+}
+
+void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ llvm_unreachable("Unimplemented");
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (MI->isBundle()) {
+ std::vector<const MachineInstr*> BundleMIs;
+
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator MII = MI;
+ ++MII;
+ unsigned int IgnoreCount = 0;
+ while (MII != MBB->end() && MII->isInsideBundle()) {
+ const MachineInstr *MInst = MII;
+ if (MInst->getOpcode() == TargetOpcode::DBG_VALUE ||
+ MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ IgnoreCount++;
+ ++MII;
+ continue;
+ }
+ //BundleMIs.push_back(&*MII);
+ BundleMIs.push_back(MInst);
+ ++MII;
+ }
+ unsigned Size = BundleMIs.size();
+ assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
+ for (unsigned Index = 0; Index < Size; Index++) {
+ HexagonMCInst MCI;
+ MCI.setStartPacket(Index == 0);
+ MCI.setEndPacket(Index == (Size-1));
+
+ HexagonLowerToMC(BundleMIs[Index], MCI, *this);
+ OutStreamer.EmitInstruction(MCI);
+ }
+ }
+ else {
+ HexagonMCInst MCI;
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ MCI.setStartPacket(true);
+ MCI.setEndPacket(true);
+ }
+ HexagonLowerToMC(MI, MCI, *this);
+ OutStreamer.EmitInstruction(MCI);
+ }
+
+ return;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \n or \0.
+// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+// Name != E; ++Name)
+// if (isprint(*Name))
+// OS << *Name;
+// }
+
+
+void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI,
+ int OpNo, raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << HexagonInstPrinter::getRegisterName(MO1.getReg())
+ << " + #"
+ << MO2.getImm();
+}
+
+
+void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
+ "Expecting global address");
+
+ O << *Mang->getSymbol(MO.getGlobal());
+ if (MO.getOffset() != 0) {
+ O << " + ";
+ O << MO.getOffset();
+ }
+}
+
+void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+ "Expecting jump table index");
+
+ // Hexagon_TODO: Do we need name mangling?
+ O << *GetJTISymbol(MO.getIndex());
+}
+
+void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) &&
+ "Expecting constant pool index");
+
+ // Hexagon_TODO: Do we need name mangling?
+ O << *GetCPISymbol(MO.getIndex());
+}
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return(new HexagonInstPrinter(MAI, MII, MRI));
+ else
+ return NULL;
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+ RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+
+ TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+ createHexagonMCInstPrinter);
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
new file mode 100755
index 000000000000..bc2af636124c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -0,0 +1,165 @@
+//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hexagon Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONASMPRINTER_H
+#define HEXAGONASMPRINTER_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ class HexagonAsmPrinter : public AsmPrinter {
+ const HexagonSubtarget *Subtarget;
+
+ public:
+ explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Assembly Printer";
+ }
+
+ bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+ virtual void EmitAlignment(unsigned NumBits,
+ const GlobalValue *GV = 0) const;
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+
+ // void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, raw_ostream &O);
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero,
+ raw_ostream &O) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+ O << getRegisterName(RegNo);
+ }
+
+ void printImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << value;
+ }
+
+ void printNegImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << -value;
+ }
+
+ void printMEMriOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << " + #"
+ << (int) MO2.getImm();
+ }
+
+ void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << ", #"
+ << MO2.getImm();
+ }
+
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << "$+" << MI->getOperand(OpNo).getImm()*4;
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printImmOperand(MI, OpNo, O);
+ }
+ else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printImmOperand(MI, OpNo, O);
+ }
+ else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O);
+
+#if 0
+ void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O);
+#endif
+
+ void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo,
+ raw_ostream &O);
+
+ void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O);
+ void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O);
+ void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O);
+
+ static const char *getRegisterName(unsigned RegNo);
+
+#if 0
+ void EmitStartOfAsmFile(Module &M);
+#endif
+ };
+
+} // end of llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 000000000000..9bca9e070709
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,235 @@
+//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon_cfg"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+ static char ID;
+ HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
+ QTM(TM),
+ QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon CFG Optimizer";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+ return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot)
+ || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+ return (Opc == Hexagon::JMP);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) {
+ const HexagonInstrInfo *QII = QTM.getInstrInfo();
+ int NewOpcode = 0;
+ switch(MI->getOpcode()) {
+ case Hexagon::JMP_c:
+ NewOpcode = Hexagon::JMP_cNot;
+ break;
+
+ case Hexagon::JMP_cNot:
+ NewOpcode = Hexagon::JMP_c;
+ break;
+
+ case Hexagon::JMP_cdnPt:
+ NewOpcode = Hexagon::JMP_cdnNotPt;
+ break;
+
+ case Hexagon::JMP_cdnNotPt:
+ NewOpcode = Hexagon::JMP_cdnPt;
+ break;
+
+ default:
+ llvm_unreachable("Cannot handle this case");
+ }
+
+ MI->setDesc(QII->get(NewOpcode));
+ MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+
+ // Traverse the basic block.
+ MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+ if (MII != MBB->end()) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (IsConditionalBranch(Opc)) {
+
+ //
+ // (Case 1) Transform the code if the following condition occurs:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...next block in layout is BB3...
+ // BB3: ...
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ //
+ // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...other basic blocks ...
+ // BB4:
+ // ...not a fall-thru
+ // BB3: ...
+ // jump BB4
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ // BB4: ...
+ //
+ unsigned NumSuccs = MBB->succ_size();
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+ MachineBasicBlock* FirstSucc = *SI;
+ MachineBasicBlock* SecondSucc = *(++SI);
+ MachineBasicBlock* LayoutSucc = NULL;
+ MachineBasicBlock* JumpAroundTarget = NULL;
+
+ if (MBB->isLayoutSuccessor(FirstSucc)) {
+ LayoutSucc = FirstSucc;
+ JumpAroundTarget = SecondSucc;
+ } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+ LayoutSucc = SecondSucc;
+ JumpAroundTarget = FirstSucc;
+ } else {
+ // Odd case...cannot handle.
+ }
+
+ // The target of the unconditional branch must be JumpAroundTarget.
+ // TODO: If not, we should not invert the unconditional branch.
+ MachineBasicBlock* CondBranchTarget = NULL;
+ if ((MI->getOpcode() == Hexagon::JMP_c) ||
+ (MI->getOpcode() == Hexagon::JMP_cNot)) {
+ CondBranchTarget = MI->getOperand(1).getMBB();
+ }
+
+ if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+ continue;
+ }
+
+ if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+ // Ensure that BB2 has one instruction -- an unconditional jump.
+ if ((LayoutSucc->size() == 1) &&
+ IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+ MachineBasicBlock* UncondTarget =
+ LayoutSucc->front().getOperand(0).getMBB();
+ // Check if the layout successor of BB2 is BB3.
+ bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+ bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+ JumpAroundTarget->size() >= 1 &&
+ IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+ JumpAroundTarget->pred_size() == 1 &&
+ JumpAroundTarget->succ_size() == 1;
+
+ if (case1 || case2) {
+ InvertAndChangeJumpTarget(MI, UncondTarget);
+ MBB->removeSuccessor(JumpAroundTarget);
+ MBB->addSuccessor(UncondTarget);
+
+ // Remove the unconditional branch in LayoutSucc.
+ LayoutSucc->erase(LayoutSucc->begin());
+ LayoutSucc->removeSuccessor(UncondTarget);
+ LayoutSucc->addSuccessor(JumpAroundTarget);
+
+ // This code performs the conversion for case 2, which moves
+ // the block to the fall-thru case (BB3 in the code above).
+ if (case2 && !case1) {
+ JumpAroundTarget->moveAfter(LayoutSucc);
+ // only move a block if it doesn't have a fall-thru. otherwise
+ // the CFG will be incorrect.
+ if (!UncondTarget->canFallThrough()) {
+ UncondTarget->moveAfter(JumpAroundTarget);
+ }
+ }
+
+ //
+ // Correct live-in information. Is used by post-RA scheduler
+ // The live-in to LayoutSucc is now all values live-in to
+ // JumpAroundTarget.
+ //
+ std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
+ LayoutSucc->livein_end());
+ std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
+ LayoutSucc->removeLiveIn(OrigLiveIn[i]);
+ }
+ for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
+ LayoutSucc->addLiveIn(NewLiveIn[i]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+ return new HexagonCFGOptimizer(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 000000000000..bd9608bdb0f4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
new file mode 100644
index 000000000000..46c20e9972b4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -0,0 +1,207 @@
+//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon_CCState class, used for lowering and
+// implementing calling conventions. Adapted from the machine independent
+// version of the class (CCState) but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonCallingConvLower.h"
+#include "Hexagon.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
+ const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &c)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
+ LocVT.getSimpleVT(), LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void Hexagon_CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const uint16_t *RegAliases = TRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void
+Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+ unsigned NumArgs = Ins.size();
+ unsigned i = 0;
+
+ // If the function returns a small struct in registers, skip
+ // over the first (dummy) argument.
+ if (SretValueInRegs != 0) {
+ ++i;
+ }
+
+
+ for (; i != NumArgs; ++i) {
+ EVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void
+Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ // For Hexagon, Return small structures in registers.
+ if (SretValueInRegs != 0) {
+ if (SretValueInRegs <= 32) {
+ unsigned Reg = Hexagon::R0;
+ addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
+ CCValAssign::Full));
+ return;
+ }
+ if (SretValueInRegs <= 64) {
+ unsigned Reg = Hexagon::D0;
+ addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
+ CCValAssign::Full));
+ return;
+ }
+ }
+
+
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ Hexagon_CCAssignFn Fn,
+ int NonVarArgsParams,
+ unsigned SretValueSize) {
+ unsigned NumOps = Outs.size();
+
+ unsigned i = 0;
+ // If the called function returns a small struct in registers, skip
+ // the first actual parameter. We do not want to pass a pointer to
+ // the stack location.
+ if (SretValueSize != 0) {
+ ++i;
+ }
+
+ for (; i != NumOps; ++i) {
+ EVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
+ NonVarArgsParams, i+1, false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void
+Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ EVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
+ false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
+ false)) {
+ dbgs() << "Call result has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
new file mode 100644
index 000000000000..1f601e87ad68
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -0,0 +1,189 @@
+//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon_CCState class, used for lowering
+// and implementing calling conventions. Adapted from the target independent
+// version but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+//
+// Need to handle varargs.
+//
+namespace llvm {
+ class TargetRegisterInfo;
+ class TargetMachine;
+ class Hexagon_CCState;
+ class SDNode;
+
+
+/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
+/// State to reflect the change.
+typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values. It captures which registers are already assigned and which
+/// stack slots are used. It provides accessors to allocate these values.
+class Hexagon_CCState {
+ CallingConv::ID CallingConv;
+ bool IsVarArg;
+ const TargetMachine &TM;
+ const TargetRegisterInfo &TRI;
+ SmallVector<CCValAssign, 16> &Locs;
+ LLVMContext &Context;
+
+ unsigned StackOffset;
+ SmallVector<uint32_t, 16> UsedRegs;
+public:
+ Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+ SmallVector<CCValAssign, 16> &locs, LLVMContext &c);
+
+ void addLoc(const CCValAssign &V) {
+ Locs.push_back(V);
+ }
+
+ LLVMContext &getContext() const { return Context; }
+ const TargetMachine &getTarget() const { return TM; }
+ unsigned getCallingConv() const { return CallingConv; }
+ bool isVarArg() const { return IsVarArg; }
+
+ unsigned getNextStackOffset() const { return StackOffset; }
+
+ /// isAllocated - Return true if the specified register (or an alias) is
+ /// allocated.
+ bool isAllocated(unsigned Reg) const {
+ return UsedRegs[Reg/32] & (1 << (Reg&31));
+ }
+
+ /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+ /// incorporating info about the formals into this state.
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+ /// incorporating info about the result values into this state.
+ void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+ /// about the passed values into this state.
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, int NonVarArgsParams,
+ unsigned SretValueSize);
+
+ /// AnalyzeCallOperands - Same as above except it takes vectors of types
+ /// and argument flags.
+ void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn);
+
+ /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+ /// incorporating info about the passed values into this state.
+ void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallResult - Same as above except it's specialized for calls which
+ /// produce a single value.
+ void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
+
+ /// getFirstUnallocated - Return the first unallocated register in the set, or
+ /// NumRegs if they are all allocated.
+ unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (!isAllocated(Regs[i]))
+ return i;
+ return NumRegs;
+ }
+
+ /// AllocateReg - Attempt to allocate one register. If it is not available,
+ /// return zero. Otherwise, return the register, marking it and any aliases
+ /// as allocated.
+ unsigned AllocateReg(unsigned Reg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with extra register to be shadowed.
+ unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateReg - Attempt to allocate one of the specified registers. If none
+ /// are available, return zero. Otherwise, return the first one available,
+ /// marking it and any aliases as allocated.
+ unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc];
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with list of registers to be shadowed.
+ unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+ unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateStack - Allocate a chunk of stack space with the specified size
+ /// and alignment.
+ unsigned AllocateStack(unsigned Size, unsigned Align) {
+ assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+ StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+ unsigned Result = StackOffset;
+ StackOffset += Size;
+ return Result;
+ }
+
+ // HandleByVal - Allocate a stack slot large enough to pass an argument by
+ // value. The size and alignment information of the argument is encoded in its
+ // parameter attribute.
+ void HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+ /// MarkAllocated - Mark a register and all of its aliases as allocated.
+ void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 000000000000..210047446034
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,177 @@
+//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly. So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register. This code currently does not use the register
+// scavenger mechanism available in the allocator. There are two registers
+// reserved to allow spilling/restoring predicate registers. One is used to
+// hold the predicate value. The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Expand Predicate Spill Code";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+ const HexagonInstrInfo *TII = QTM.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::STriw_pred) {
+ // STriw_pred [R30], ofst, SrcReg;
+ unsigned FP = MI->getOperand(0).getReg();
+ assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(1).isImm() && "Not an offset");
+ int Offset = MI->getOperand(1).getImm();
+ int SrcReg = MI->getOperand(2).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+ if (!TII->isValidOffset(Hexagon::STriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0)
+ .addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)).
+ addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred [R30], ofst.
+ int DstReg = MI->getOperand(0).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+ "Not a predicate register");
+ unsigned FP = MI->getOperand(1).getReg();
+ assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(2).isImm() && "Not an offset");
+ int Offset = MI->getOperand(2).getImm();
+ if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP)
+ .addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+ return new HexagonExpandPredSpillCode(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 000000000000..e8a692406e08
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,332 @@
+//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonFrameLowering.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet(
+ "disable-hexagon-dealloc-ret",
+ cl::Hidden,
+ cl::desc("Disable Dealloc Return for Hexagon target"));
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ determineFrameLayout(MF);
+
+ // Check if frame moves are needed for EH.
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !MF.getFunction()->needsUnwindTableEntry();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ int NumBytes = (int) MFI->getStackSize();
+
+ // LLVM expects allocframe not to be the first instruction in the
+ // basic block.
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+ //
+ // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset.
+ //
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ const std::vector<MachineInstr*>& AdjustRegs =
+ FuncInfo->getAllocaAdjustInsts();
+ for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
+ e = AdjustRegs.end();
+ i != e; ++i) {
+ MachineInstr* MI = *i;
+ assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
+ "Expected adjust alloca node");
+
+ MachineOperand& MO = MI->getOperand(2);
+ assert(MO.isImm() && "Expected immediate");
+ MO.setImm(MFI->getMaxCallFrameSize());
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ if (needsFrameMoves) {
+ // Advance CFA. DW_CFA_def_cfa
+ unsigned FPReg = QRI->getFrameRegister();
+ unsigned RAReg = QRI->getRARegister();
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, -8);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // R31 = (R31 - #4)
+ MachineLocation LRDst(RAReg, -4);
+ MachineLocation LRSrc(RAReg);
+ Moves.push_back(MachineMove(0, LRDst, LRSrc));
+
+ // R30 = (R30 - #8)
+ MachineLocation SPDst(FPReg, -8);
+ MachineLocation SPSrc(FPReg);
+ Moves.push_back(MachineMove(0, SPDst, SPSrc));
+ }
+
+ //
+ // Only insert ALLOCFRAME if we need to.
+ //
+ if (hasFP(MF)) {
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const int ALLOCFRAME_MAX = 16384;
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+
+ // Subtract offset from frame pointer.
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+ QRI->getStackRegister()).
+ addReg(QRI->getStackRegister()).
+ addReg(HEXAGON_RESERVED_REG_1);
+ } else {
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+// Returns true if MBB has a machine instructions that indicates a tail call
+// in the block.
+bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+
+void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+ //
+ // Only insert deallocframe if we need to.
+ //
+ if (hasFP(MF)) {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineBasicBlock::iterator MBBI_end = MBB.end();
+ //
+ // For Hexagon, we don't need the frame size.
+ //
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
+ // versions.
+ if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+ && !DisableDeallocRet) {
+ // Remove jumpr node.
+ MBB.erase(MBBI);
+ // Add dealloc_return.
+ BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4))
+ .addImm(NumBytes);
+ } else { // Add deallocframe for V2 and V3.
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
+ FuncInfo->hasClobberLR() );
+}
+
+bool
+HexagonFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word store.
+ //
+ const uint16_t* SuperReg = TRI->getSuperRegisters(Reg);
+
+ // Assume that there is exactly one superreg.
+ assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ bool CanUseDblStore = false;
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+ assert(SuperRegNext[0] && !SuperRegNext[1] &&
+ "Expected exactly one superreg");
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+ CanUseDblStore = (SuperRegNext[0] == SuperReg[0]);
+ }
+
+
+ if (CanUseDblStore) {
+ TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true,
+ CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg[0]);
+ ++i;
+ } else {
+ // Cannot use a double-word store.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
+ TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+
+bool HexagonFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word load.
+ //
+ const uint16_t* SuperReg = TRI->getSuperRegisters(Reg);
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ // Assume that there is exactly one superreg.
+ assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ bool CanUseDblLoad = false;
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+ assert(SuperRegNext[0] && !SuperRegNext[1] &&
+ "Expected exactly one superreg");
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+ CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]);
+ }
+
+
+ if (CanUseDblLoad) {
+ TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(),
+ SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg[0]);
+ ++i;
+ } else {
+ // Cannot use a double-word load.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ return MF.getFrameInfo()->getObjectOffset(FI);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 000000000000..ad87f11e2457
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,50 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_FRAMEINFO_H
+#define HEXAGON_FRAMEINFO_H
+
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonFrameLowering : public TargetFrameLowering {
+private:
+ const HexagonSubtarget &STI;
+ void determineFrameLayout(MachineFunction &MF) const;
+
+public:
+ explicit HexagonFrameLowering(const HexagonSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool
+ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ virtual bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasTailCall(MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 000000000000..57772a514d55
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,644 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction. The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested hardware loops.
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hwloops"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+ class CountValue;
+ struct HexagonHardwareLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+ /// induction variable.
+ /// Should be defined in MachineLoop. Based upon version in class Loop.
+ const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
+
+ /// getTripCount - Return a loop-invariant LLVM register indicating the
+ /// number of times the loop will be executed. If the trip-count cannot
+ /// be determined, this return null.
+ CountValue *getTripCount(MachineLoop *L) const;
+
+ /// isInductionOperation - Return true if the instruction matches the
+ /// pattern for an opertion that defines an induction variable.
+ bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+ /// isInvalidOperation - Return true if the instruction is not valid within
+ /// a hardware loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// containsInavlidInstruction - Return true if the loop contains an
+ /// instruction that inhibits using the hardware loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// converToHardwareLoop - Given a loop, check if we can convert it to a
+ /// hardware loop. If so, then perform the conversion and return true.
+ bool convertToHardwareLoop(MachineLoop *L);
+
+ };
+
+ char HexagonHardwareLoops::ID = 0;
+
+
+ // CountValue class - Abstraction for a trip count of a loop. A
+ // smaller vesrsion of the MachineOperand class without the concerns
+ // of changing the operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ unsigned RegNum;
+ int64_t ImmVal;
+ Values(unsigned r) : RegNum(r) {}
+ Values(int64_t i) : ImmVal(i) {}
+ } Contents;
+ bool isNegative;
+
+ public:
+ CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+ isNegative(neg) {}
+ explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+ isNegative(i < 0) {}
+ CountValueType getType() const { return Kind; }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+ bool isNeg() const { return isNegative; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.RegNum;
+ }
+ void setReg(unsigned Val) {
+ Contents.RegNum = Val;
+ }
+ int64_t getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ if (isNegative) {
+ return -Contents.ImmVal;
+ }
+ return Contents.ImmVal;
+ }
+ void setImm(int64_t Val) {
+ Contents.ImmVal = Val;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ if (isReg()) { OS << PrintReg(getReg()); }
+ if (isImm()) { OS << getImm(); }
+ }
+ };
+
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// Maximum distance between the loop instr and the basic block.
+ /// Just an estimate.
+ static const unsigned MAX_LOOP_DISTANCE = 200;
+
+ /// fixupLoopInstrs - Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// convertLoopInstr - Add the instruction to set the LC and SA registers
+ /// explicitly.
+ void convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS);
+
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+
+} // end anonymous namespace
+
+
+/// isHardwareLoop - Returns true if the instruction is a hardware loop
+/// instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::CMPEQri;
+}
+
+
+/// createHexagonHardwareLoops - Factory for creating
+/// the hardware loop phase.
+FunctionPass *llvm::createHexagonHardwareLoops() {
+ return new HexagonHardwareLoops();
+}
+
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+ bool Changed = false;
+
+ // get the loop information
+ MLI = &getAnalysis<MachineLoopInfo>();
+ // get the register information
+ MRI = &MF.getRegInfo();
+ // the target specific instructio info.
+ TII = MF.getTarget().getInstrInfo();
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop()) {
+ Changed |= convertToHardwareLoop(L);
+ }
+ }
+
+ return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero. If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+const MachineInstr
+*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) return 0; // dead loop
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
+
+ // make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return 0;
+
+ // Loop over all of the PHI nodes, looking for a canonical induction variable:
+ // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+ // - The recurrence comes from the backedge.
+ // - the definition is an induction operatio.n
+ for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *MPhi = &*I;
+ unsigned DefReg = MPhi->getOperand(0).getReg();
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ // Check each operand for the value from the backedge.
+ MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+ if (L->contains(MBB)) { // operands comes from the backedge
+ // Check if the definition is an induction operation.
+ const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+ if (isInductionOperation(DI, DefReg)) {
+ return MPhi;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed. The trip count can
+/// be either a register or a constant value. If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable. We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
+ // Check that the loop has a induction variable.
+ const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
+ if (IV_Inst == 0) return 0;
+
+ // Canonical loops will end with a 'cmpeq_ri IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ const MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ const MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI)) {
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be 0");
+ int64_t ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
+
+ if (ImmVal == 0) {
+ // Make sure the induction variable changes by one on each iteration.
+ if (iv_value != 1 && iv_value != -1) {
+ return 0;
+ }
+ return new CountValue(InitialValue->getReg(), iv_value > 0);
+ } else {
+ assert(InitialValue->isReg() && "Expecting register for init value");
+ const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
+ if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
+ int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+/// add iv, c
+///
+bool
+HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
+ unsigned IVReg) const {
+ return (MI->getOpcode() ==
+ Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// hardware loop.
+bool
+HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a hardware loop
+ if (MI->getDesc().isCall()) {
+ return true;
+ }
+ // do not allow nested hardware loops
+ if (isHardwareLoop(MI)) {
+ return true;
+ }
+ // check if the instruction defines a hardware loop register
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
+ MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the hardware loop function.
+///
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// converToHardwareLoop - check if the loop is a candidate for
+/// converting to a hardware loop. If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first. A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToHardwareLoop(*I);
+ }
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed) {
+ return Changed;
+ }
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getTripCount(L);
+ if (TripCount == 0) {
+ return false;
+ }
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L)) {
+ return false;
+ }
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ // No preheader means there's not place for the loop instr.
+ if (Preheader == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate hw loop if the loop has more than one exit.
+ if (LastMBB == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the loop
+ // latch may contains instrs. that need to be executed after the 1st iter.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart)) {
+ return false;
+ }
+ }
+
+ // Convert the loop to a hardware loop
+ DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ MachineFunction *MF = LastMBB->getParent();
+ const TargetRegisterClass *RC =
+ MF->getRegInfo().getRegClass(TripCount->getReg());
+ unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
+ if (TripCount->isNeg()) {
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
+ }
+
+ // Add the Loop instruction to the begining of the loop.
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+ } else {
+ assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop.
+ int64_t CountImm = TripCount->getImm();
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+ }
+
+ // Make sure the loop start always has a reference in the CFG. We need to
+ // create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with an endloop instruction.
+ DebugLoc dl = LastI->getDebugLoc();
+ BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ if (LastI->getOpcode() == Hexagon::JMP_c ||
+ LastI->getOpcode() == Hexagon::JMP_cNot) {
+ // delete one and change/add an uncond. branch to out of the loop
+ MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+ LastI = LastMBB->erase(LastI);
+ if (!L->contains(BranchTarget)) {
+ if (LastI != LastMBB->end()) {
+ TII->RemoveBranch(*LastMBB);
+ }
+ SmallVector<MachineOperand, 0> Cond;
+ TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+ }
+ } else {
+ // Conditional branch to loop start; just delete it.
+ LastMBB->erase(LastI);
+ }
+ delete TripCount;
+
+ ++NumHWLoops;
+ return true;
+}
+
+/// createHexagonFixupHwLoops - Factory for creating the hardware loop
+/// phase.
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
+
+ bool Changed = fixupLoopInstrs(MF);
+ return Changed;
+}
+
+/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+ // First pass - compute the offset of each basic block.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ BlockToInstOffset[MBB] = InstOffset;
+ InstOffset += (MBB->size() * 4);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to
+ // be converted.
+ InstOffset = 0;
+ bool Changed = false;
+ RegScavenger RS;
+
+ // Loop over all the basic blocks.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ InstOffset = BlockToInstOffset[MBB];
+ RS.enterBasicBlock(MBB);
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MIE = MBB->end();
+ MachineBasicBlock::iterator MII = MBB->begin();
+ while (MII != MIE) {
+ if (isHardwareLoop(MII)) {
+ RS.forward(MII);
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ diff = (diff > 0 ? diff : -diff);
+ if ((unsigned)diff > MAX_LOOP_DISTANCE) {
+ // Convert to explicity setting LC0 and SA0.
+ convertLoopInstr(MF, MII, RS);
+ MII = MBB->erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ InstOffset += 4;
+ }
+ }
+
+ return Changed;
+
+}
+
+/// convertLoopInstr - convert a loop instruction to a sequence of instructions
+/// that set the lc and sa register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS) {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0);
+
+ // First, set the LC0 with the trip count.
+ if (MII->getOperand(1).isReg()) {
+ // Trip count is a register
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(MII->getOperand(1).getReg());
+ } else {
+ // Trip count is an immediate.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ .addImm(MII->getOperand(1).getImm());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(Scratch);
+ }
+ // Then, set the SA0 with the loop start address.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+ .addMBB(MII->getOperand(0).getMBB());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9df965efc14b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1485 @@
+//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-isel"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const HexagonSubtarget &Subtarget;
+
+ // Keep a reference to HexagonTargetMachine.
+ HexagonTargetMachine& TM;
+ const HexagonInstrInfo *TII;
+
+public:
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
+ : SelectionDAGISel(targetmachine),
+ Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
+ TM(targetmachine),
+ TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
+
+ }
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+
+ virtual const char *getPassName() const {
+ return "Hexagon DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+ bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectSHL(SDNode *N);
+ SDNode *SelectSelect(SDNode *N);
+ SDNode *SelectTruncate(SDNode *N);
+ SDNode *SelectMul(SDNode *N);
+ SDNode *SelectZeroExtend(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
+ SDNode *SelectConstant(SDNode *N);
+ SDNode *SelectAdd(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "HexagonGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
+ return new HexagonDAGToDAGISel(TM);
+}
+
+static bool IsS11_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}
+
+
+static bool IsS11_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}
+
+
+static bool IsS11_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}
+
+
+static bool IsS11_3_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}
+
+
+static bool IsU6_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}
+
+
+static bool IsU6_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}
+
+
+static bool IsU6_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}
+
+
+// Intrinsics that return a a predicate.
+static unsigned doesIntrinsicReturnPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_cmpeq:
+ case Intrinsic::hexagon_C2_cmpgt:
+ case Intrinsic::hexagon_C2_cmpgtu:
+ case Intrinsic::hexagon_C2_cmpgtup:
+ case Intrinsic::hexagon_C2_cmpgtp:
+ case Intrinsic::hexagon_C2_cmpeqp:
+ case Intrinsic::hexagon_C2_bitsset:
+ case Intrinsic::hexagon_C2_bitsclr:
+ case Intrinsic::hexagon_C2_cmpeqi:
+ case Intrinsic::hexagon_C2_cmpgti:
+ case Intrinsic::hexagon_C2_cmpgtui:
+ case Intrinsic::hexagon_C2_cmpgei:
+ case Intrinsic::hexagon_C2_cmpgeui:
+ case Intrinsic::hexagon_C2_cmplt:
+ case Intrinsic::hexagon_C2_cmpltu:
+ case Intrinsic::hexagon_C2_bitsclri:
+ case Intrinsic::hexagon_C2_and:
+ case Intrinsic::hexagon_C2_or:
+ case Intrinsic::hexagon_C2_xor:
+ case Intrinsic::hexagon_C2_andn:
+ case Intrinsic::hexagon_C2_not:
+ case Intrinsic::hexagon_C2_orn:
+ case Intrinsic::hexagon_C2_pxfer_map:
+ case Intrinsic::hexagon_C2_any8:
+ case Intrinsic::hexagon_C2_all8:
+ case Intrinsic::hexagon_A2_vcmpbeq:
+ case Intrinsic::hexagon_A2_vcmpbgtu:
+ case Intrinsic::hexagon_A2_vcmpheq:
+ case Intrinsic::hexagon_A2_vcmphgt:
+ case Intrinsic::hexagon_A2_vcmphgtu:
+ case Intrinsic::hexagon_A2_vcmpweq:
+ case Intrinsic::hexagon_A2_vcmpwgt:
+ case Intrinsic::hexagon_A2_vcmpwgtu:
+ case Intrinsic::hexagon_C2_tfrrp:
+ case Intrinsic::hexagon_S2_tstbit_i:
+ case Intrinsic::hexagon_S2_tstbit_r:
+ return 1;
+ }
+}
+
+
+// Intrinsics that have predicate operands.
+static unsigned doesIntrinsicContainPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_tfrpr:
+ return Hexagon::TFR_RsPd;
+ case Intrinsic::hexagon_C2_and:
+ return Hexagon::AND_pp;
+ case Intrinsic::hexagon_C2_xor:
+ return Hexagon::XOR_pp;
+ case Intrinsic::hexagon_C2_or:
+ return Hexagon::OR_pp;
+ case Intrinsic::hexagon_C2_not:
+ return Hexagon::NOT_p;
+ case Intrinsic::hexagon_C2_any8:
+ return Hexagon::ANY_pp;
+ case Intrinsic::hexagon_C2_all8:
+ return Hexagon::ALL_pp;
+ case Intrinsic::hexagon_C2_vitpack:
+ return Hexagon::VITPACK_pp;
+ case Intrinsic::hexagon_C2_mask:
+ return Hexagon::MASK_p;
+ case Intrinsic::hexagon_C2_mux:
+ return Hexagon::MUX_rr;
+
+ // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxir:
+ return Hexagon::MUX_ri;
+
+ // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxri:
+ return Hexagon::MUX_ir;
+
+ case Intrinsic::hexagon_C2_muxii:
+ return Hexagon::MUX_ii;
+ case Intrinsic::hexagon_C2_vmux:
+ return Hexagon::VMUX_prr64;
+ case Intrinsic::hexagon_S2_valignrb:
+ return Hexagon::VALIGN_rrp;
+ case Intrinsic::hexagon_S2_vsplicerb:
+ return Hexagon::VSPLICE_rrp;
+ }
+}
+
+
+static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
+ if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i8 && isInt<11>(Offset)) {
+ return true;
+ }
+ return false;
+}
+
+
+//
+// Try to lower loads of GlobalAdresses into base+offset loads. Custom
+// lowering for GlobalAddress nodes has already turned it into a
+// CONST32.
+//
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDNode* Const32 = LD->getBasePtr().getNode();
+ unsigned Opcode = 0;
+
+ if (Const32->getOpcode() == HexagonISD::CONST32 &&
+ ISD::isNormalLoad(LD)) {
+ SDValue Base = Const32->getOperand(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+ // Figure out base + offset opcode
+ if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
+ else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
+ else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
+ else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+ else assert (0 && "unknown memory type");
+
+ // Build indexed load.
+ SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other,
+ SDValue(NewBase,0),
+ TargetConstOff,
+ Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(LD, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+ MVT::i64, SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
+ MVT::i32, Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
+ SDValue(Result_4, 0), // New address.
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT LoadedVT = LD->getMemoryVT();
+ unsigned Opcode = 0;
+
+ // Check for zero ext loads.
+ bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+
+ // Figure out the opcode.
+ if (LoadedVT == MVT::i64) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDrid;
+ else
+ Opcode = Hexagon::LDrid;
+ } else if (LoadedVT == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDriw;
+ else
+ Opcode = Hexagon::LDriw;
+ } else if (LoadedVT == MVT::i16) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+ else
+ Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+ } else if (LoadedVT == MVT::i8) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+ else
+ Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+ } else
+ assert (0 && "unknown memory type");
+
+ // For zero ext i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::ZEXTLOAD) {
+ return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+ }
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::SEXTLOAD) {
+ // Handle sign ext i64 loads.
+ return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+ }
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result;
+ } else {
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_1, 0),
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_1;
+ }
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+ SDNode *result;
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+ // Handle indexed loads.
+ if (AM != ISD::UNINDEXED) {
+ result = SelectIndexedLoad(LD, dl);
+ } else {
+ result = SelectBaseOffsetLoad(LD, dl);
+ }
+
+ return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDValue Base = ST->getBasePtr();
+ SDValue Offset = ST->getOffset();
+ SDValue Value = ST->getValue();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT StoredVT = ST->getMemoryVT();
+
+ // Offset value must be within representable range
+ // and must have correct alignment properties.
+ if (TII->isValidAutoIncImm(StoredVT, Val)) {
+ SDValue Ops[] = { Value, Base,
+ CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the post inc version of opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+ else assert (0 && "unknown memory type");
+
+ // Build post increment store.
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(ST, Result);
+ ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+ return Result;
+ }
+
+ // Note: Order of operands matches the def of instruction:
+ // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // and it differs for POST_ST* for instance.
+ SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+ Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+ else assert (0 && "unknown memory type");
+
+ // Build regular store.
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
+ 4);
+ // Build splitted incriment instruction.
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base,
+ TargetConstVal,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+ ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+ return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
+ DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDNode* Const32 = ST->getBasePtr().getNode();
+ SDValue Value = ST->getValue();
+ unsigned Opcode = 0;
+
+ // Try to lower stores of GlobalAdresses into indexed stores. Custom
+ // lowering for GlobalAddress nodes has already turned it into a
+ // CONST32. Avoid truncating stores for the moment. Post-inc stores
+ // do the same. Don't think there's a reason for it, so will file a
+ // bug to fix.
+ if ((Const32->getOpcode() == HexagonISD::CONST32) &&
+ !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
+ SDValue Base = Const32->getOperand(0);
+ if (Base.getOpcode() == ISD::TargetGlobalAddress) {
+ EVT StoredVT = ST->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+
+ // Figure out base + offset opcode
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+ else assert (0 && "unknown memory type");
+
+ SDValue Ops[] = {SDValue(NewBase,0),
+ CurDAG->getTargetConstant(Offset,PointerTy),
+ Value, Chain};
+ // build indexed store
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(ST, Result);
+ return Result;
+ }
+ }
+ }
+
+ return SelectCode(ST);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+ // Handle indexed stores.
+ if (AM != ISD::UNINDEXED) {
+ return SelectIndexedStore(ST, dl);
+ }
+
+ return SelectBaseOffsetStore(ST, dl);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ //
+ // --- match with the following ---
+ //
+ // %mul.i = mpy (%tmp1, %add)
+ //
+
+ if (N->getValueType(0) == MVT::i64) {
+ // Shifting a i64 signed multiply.
+ SDValue MulOp0 = N->getOperand(0);
+ SDValue MulOp1 = N->getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload.
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N000 = N00.getOperand(0);
+ SDValue N001 = N00.getOperand(1);
+ if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
+ SDValue N01 = N0.getOperand(1);
+ SDValue N02 = N0.getOperand(2);
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Shift = N->getOperand(0);
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ // %shr5.i = lshr i64 %mul.i, 32
+ // %conv3.i = trunc i64 %shr5.i to i32
+ //
+ // --- match with the following ---
+ //
+ // %conv3.i = mpy (%tmp1, %add)
+ //
+ // Trunc to i32.
+ if (N->getValueType(0) == MVT::i32) {
+ // Trunc from i64.
+ if (Shift.getNode()->getValueType(0) == MVT::i64) {
+ // Trunc child is logical shift right.
+ if (Shift.getOpcode() != ISD::SRL) {
+ return SelectCode(N);
+ }
+
+ SDValue ShiftOp0 = Shift.getOperand(0);
+ SDValue ShiftOp1 = Shift.getOperand(1);
+
+ // Shift by const 32
+ if (ShiftOp1.getOpcode() != ISD::Constant) {
+ return SelectCode(N);
+ }
+
+ int32_t ShiftConst =
+ cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
+ if (ShiftConst != 32) {
+ return SelectCode(N);
+ }
+
+ // Shifting a i64 signed multiply
+ SDValue Mul = ShiftOp0;
+ if (Mul.getOpcode() != ISD::MUL) {
+ return SelectCode(N);
+ }
+
+ SDValue MulOp0 = Mul.getOperand(0);
+ SDValue MulOp1 = Mul.getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32)
+ return SelectCode(N);
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i32) {
+ SDValue Shl_0 = N->getOperand(0);
+ SDValue Shl_1 = N->getOperand(1);
+ // RHS is const.
+ if (Shl_1.getOpcode() == ISD::Constant) {
+ if (Shl_0.getOpcode() == ISD::MUL) {
+ SDValue Mul_0 = Shl_0.getOperand(0); // Val
+ SDValue Mul_1 = Shl_0.getOperand(1); // Const
+ // RHS of mul is const.
+ if (Mul_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t MulConst =
+ cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+ int32_t ValConst = MulConst << ShlConst;
+ SDValue Val = CurDAG->getTargetConstant(ValConst,
+ MVT::i32);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+ MVT::i32, Mul_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ }
+ } else if (Shl_0.getOpcode() == ISD::SUB) {
+ SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+ SDValue Sub_1 = Shl_0.getOperand(1); // Val
+ if (Sub_0.getOpcode() == ISD::Constant) {
+ int32_t SubConst =
+ cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+ if (SubConst == 0) {
+ if (Sub_1.getOpcode() == ISD::SHL) {
+ SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+ SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+ if (Shl2_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t Shl2Const =
+ cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+ int32_t ValConst = 1 << (ShlConst+Shl2Const);
+ SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+ if (ConstantSDNode *CN =
+ dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+ Shl2_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDNode *IsIntrinsic = N->getOperand(0).getNode();
+ if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+ unsigned ID =
+ cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+ if (doesIntrinsicReturnPredicate(ID)) {
+ // Now we need to differentiate target data types.
+ if (N->getValueType(0) == MVT::i64) {
+ // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+ MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 0));
+ ReplaceUses(N, Result_3);
+ return Result_3;
+ }
+ if (N->getValueType(0) == MVT::i32) {
+ // Convert the zero_extend to Rs = Pd
+ SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ ReplaceUses(N, RsPd);
+ return RsPd;
+ }
+ llvm_unreachable("Unexpected value type");
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
+
+ // We are concerned with only those intrinsics that have predicate registers
+ // as at least one of the operands.
+ if (IntrinsicWithPred) {
+ SmallVector<SDValue, 8> Ops;
+ const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ // Iterate over all the operands of the intrinsics.
+ // For PredRegs, do the transfer.
+ // For Double/Int Regs, just preserve the value
+ // For immediates, lower it.
+ for (unsigned i = 1; i < N->getNumOperands(); ++i) {
+ SDNode *Arg = N->getOperand(i).getNode();
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+
+ if (RC == Hexagon::IntRegsRegisterClass ||
+ RC == Hexagon::DoubleRegsRegisterClass) {
+ Ops.push_back(SDValue(Arg, 0));
+ } else if (RC == Hexagon::PredRegsRegisterClass) {
+ // Do the transfer.
+ SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(Arg, 0));
+ Ops.push_back(SDValue(PdRs,0));
+ } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+ // This is immediate operand. Lower it here making sure that we DO have
+ // const SDNode for immediate value.
+ int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
+ SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ Ops.push_back(SDVal);
+ } else {
+ llvm_unreachable("Unimplemented");
+ }
+ }
+ EVT ReturnValueVT = N->getValueType(0);
+ SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
+ ReturnValueVT,
+ Ops.data(), Ops.size());
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i1) {
+ SDNode* Result;
+ int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Val == -1) {
+ // Create the IntReg = 1 node.
+ SDNode* IntRegTFR =
+ CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32));
+
+ // Pd = IntReg
+ SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(IntRegTFR, 0));
+
+ // not(Pd)
+ SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1,
+ SDValue(Pd, 0));
+
+ // xor(not(Pd))
+ Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+ SDValue(Pd, 0), SDValue(NotPd, 0));
+
+ // We have just built:
+ // Rs = Pd
+ // Pd = xor(not(Pd), Pd)
+
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+ // Identify nodes of the form: add(asr(...)).
+ SDNode* Src1 = N->getOperand(0).getNode();
+ if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+ || Src1->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+ // Rd and Rd' are assigned to the same register
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32,
+ N->getOperand(1),
+ Src1->getOperand(0),
+ Src1->getOperand(1));
+ ReplaceUses(N, Result);
+
+ return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+
+ case ISD::ADD:
+ return SelectAdd(N);
+
+ case ISD::SHL:
+ return SelectSHL(N);
+
+ case ISD::LOAD:
+ return SelectLoad(N);
+
+ case ISD::STORE:
+ return SelectStore(N);
+
+ case ISD::SELECT:
+ return SelectSelect(N);
+
+ case ISD::TRUNCATE:
+ return SelectTruncate(N);
+
+ case ISD::MUL:
+ return SelectMul(N);
+
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
+// to define these instructions.
+//
+bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+
+ if (Addr.getOpcode() != ISD::ADD) {
+ return(SelectADDRriS11_2(Addr, Base, Offset));
+ }
+
+ return SelectADDRriS11_2(Addr, Base, Offset);
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+}
+
+bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
+ SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+
+ return true;
+}
+
+
+// Handle generic address case. It is accessed from inlined asm =m constraints,
+// which could have any kind of pointer.
+bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+
+ switch (ConstraintCode) {
+ case 'o': // Offsetable.
+ case 'v': // Not offsetable.
+ default: return true;
+ case 'm': // Memory.
+ if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 000000000000..d6da0d0911b9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,1496 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+const unsigned Hexagon_MAX_RET_SIZE = 64;
+
+static cl::opt<bool>
+EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+ cl::desc("Control jump table emission on Hexagon target"));
+
+int NumNamedVarArgParams = -1;
+
+// Implement calling convention for Hexagon.
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ // NumNamedVarArgParams can not be zero for a VarArg function.
+ assert ( (NumNamedVarArgParams > 0) &&
+ "NumNamedVarArgParams is not bigger than zero.");
+
+ if ( (int)ValNo < NumNamedVarArgParams ) {
+ // Deal with named arguments.
+ return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+ }
+
+ // Deal with un-named arguments.
+ unsigned ofst;
+ if (ArgFlags.isByVal()) {
+ // If pass-by-value, the size allocated on stack is decided
+ // by ArgFlags.getByValSize(), not by the size of LocVT.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i32) {
+ ofst = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i64) {
+ ofst = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ llvm_unreachable(0);
+}
+
+
+static bool
+CC_Hexagon (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (ArgFlags.isByVal()) {
+ // Passed on stack.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32) {
+ if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64) {
+ if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const uint16_t RegList[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ static const uint16_t RegList1[] = {
+ Hexagon::D1, Hexagon::D2
+ };
+ static const uint16_t RegList2[] = {
+ Hexagon::R1, Hexagon::R3
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+
+ if (LocVT == MVT::i1 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32) {
+ if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64) {
+ if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ if (LocVT == MVT::i64) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+ return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter. Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Check for varargs.
+ NumNamedVarArgParams = -1;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
+ {
+ const Function* CalleeFn = NULL;
+ Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
+ if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
+ {
+ // If a function has zero args and is a vararg function, that's
+ // disallowed so it must be an undeclared function. Do not assume
+ // varargs if the callee is undefined.
+ if (CalleeFn->isVarArg() &&
+ CalleeFn->getFunctionType()->getNumParams() != 0) {
+ NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
+ }
+ }
+ }
+
+ if (NumNamedVarArgParams > 0)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+
+ if(isTailCall) {
+ bool StructAttrFlag =
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet,
+ StructAttrFlag,
+ Outs, OutVals, Ins, DAG);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ isTailCall = false;
+ break;
+ }
+ }
+ if (isTailCall) {
+ DEBUG(dbgs () << "Eligible for Tail Call\n");
+ } else {
+ DEBUG(dbgs () <<
+ "Argument must be passed on stack. Not eligible for Tail Call\n");
+ }
+ }
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
+ getPointerTy());
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ // Loc info must be one of Full, SExt, ZExt, or AExt.
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isMemLoc()) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+
+ if (Flags.isByVal()) {
+ // The argument is a struct passed by value. According to LLVM, "Arg"
+ // is is pointer.
+ MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+ Flags, DAG, dl));
+ } else {
+ // The argument is not passed by value. "Arg" is a buildin type. It is
+ // not a pointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false,
+ 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector.
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
+ MemOpChains.size());
+ }
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ //
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (flag_aligned_memcpy) {
+ const char *MemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+ Callee =
+ DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+ flag_aligned_memcpy = false;
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ if (isTailCall)
+ return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD)
+ return false;
+
+ if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ // Ensure that Offset is a constant.
+ return (isa<ConstantSDNode>(Offset));
+ }
+
+ return false;
+}
+
+// TODO: Put this function along with the other isS* functions in
+// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
+// functions defined in HexagonImmediates.td.
+static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ int64_t m = 0;
+ if (ShiftAmount > 0) {
+ m = v % ShiftAmount;
+ v = v >> ShiftAmount;
+ }
+ return (v <= 7) && (v >= -8) && (m == 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const
+{
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ bool isInc = false;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
+ int ShiftAmount = VT.getSizeInBits() / 16;
+ if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ switch (Node->getOpcode()) {
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ if (FuncInfo->hasClobberLR())
+ break;
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_Mem: {
+ for (; NumVals; --NumVals, ++i) {}
+ break;
+ }
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg =
+ cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+ // Check it to be lr
+ if (Reg == TM.getRegisterInfo()->getRARegister()) {
+ FuncInfo->setHasClobberLR(true);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ } // Node->getOpcode
+ return Op;
+}
+
+
+//
+// Taken from the XCore backend.
+//
+SDValue HexagonTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ // Mark all jump table targets as address taken.
+ const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ MBB->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object.
+ BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
+ }
+
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ getPointerTy(), TargetJT);
+ SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(2, MVT::i32));
+ SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
+ ShiftIndex);
+ SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
+ MachinePointerInfo(), false, false, false,
+ 0);
+ return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+ // Get a reference to the stack pointer.
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ //
+ // For Hexagon, the outgoing memory arguments area should be on top of the
+ // alloca area on the stack i.e., the outgoing memory arguments should be
+ // at a lower address than the alloca area. Move the alloca area down the
+ // stack by adding back the space reserved for outgoing arguments to SP
+ // here.
+ //
+ // We do not know what the size of the outgoing args is at this point.
+ // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
+ // stack pointer. We patch this instruction with the correct, known
+ // offset in emitPrologue().
+ //
+ // Use a placeholder immediate (zero) for now. This will be patched up
+ // by emitPrologue().
+ SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
+ MVT::i32,
+ Sub,
+ DAG.getConstant(0, MVT::i32));
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
+ TM.getRegisterInfo()->getStackRegister(),
+ Sub);
+
+ SDValue Ops[2] = { ArgAdjust, CopyChain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+ // For LLVM, in the case when returning a struct by value (>8byte),
+ // the first argument is a pointer that points to the location on caller's
+ // stack where the return value will be stored. For Hexagon, the location on
+ // caller's stack is passed only when the struct size is smaller than (and
+ // equal to) 8 bytes. If not, no address will be passed into callee and
+ // callee return the result direclty through R0/R1.
+
+ SmallVector<SDValue, 4> MemOps;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ unsigned ObjSize;
+ unsigned StackLocation;
+ int FI;
+
+ if ( (VA.isRegLoc() && !Flags.isByVal())
+ || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+ // Arguments passed in registers
+ // 1. int, long long, ptr args that get allocated in register.
+ // 2. Large struct that gets an register to put its address in.
+ EVT RegVT = VA.getLocVT();
+ if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::i64) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else {
+ assert (0);
+ }
+ } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+ assert (0 && "ByValSize must be bigger than 8 bytes");
+ } else {
+ // Sanity check.
+ assert(VA.isMemLoc());
+
+ if (Flags.isByVal()) {
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ ObjSize = Flags.getByValSize();
+ } else {
+ ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+ }
+
+ StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ // Create the frame index object for this incoming parameter...
+ FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+ // Create the SelectionDAG nodes cordl, responding to a load
+ // from this parameter.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+ if (Flags.isByVal()) {
+ // If it's a pass-by-value aggregate, then do not dereference the stack
+ // location. Instead, we should generate a reference to the stack
+ // location.
+ InVals.push_back(FIN);
+ } else {
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo(), false, false,
+ false, 0));
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
+ MemOps.size());
+
+ if (isVarArg) {
+ // This will point to the next argument passed via stack.
+ int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+ HEXAGON_LRFP_SIZE +
+ CCInfo.getNextStackOffset(),
+ true);
+ FuncInfo->setVarArgsFrameIndex(FrameIndex);
+ }
+
+ return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // VASTART stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr,
+ Op.getOperand(1), MachinePointerInfo(SV), false,
+ false, 0);
+}
+
+SDValue
+HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDNode* OpNode = Op.getNode();
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1,
+ Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0),
+ Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ const HexagonRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ TRI->getFrameRegister(), VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+
+SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ DebugLoc dl = Op.getDebugLoc();
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+
+ HexagonTargetObjectFile &TLOF =
+ (HexagonTargetObjectFile&)getObjFileLowering();
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+ }
+
+ return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
+ &targetmachine)
+ : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ TM(targetmachine) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass);
+ addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass);
+
+ addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass);
+
+ computeRegisterProperties();
+
+ // Align loop entry
+ setPrefLoopAlignment(4);
+
+ // Limits for inline expansion of memcpy/memmove
+ maxStoresPerMemcpy = 6;
+ maxStoresPerMemmove = 6;
+
+ //
+ // Library calls for unsupported operations
+ //
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ setOperationAction(ISD::FDIV, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+
+
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // Turn FP extload into load/fextend.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Hexagon has a i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ // Truncate action?
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+ // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Hexagon has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // Lower SELECT_CC to SETCC and SELECT.
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+ // support SELECT_CC on every type.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ if (EmitJumpTables) {
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Expand);
+
+ if (TM.getSubtargetImpl()->isSubtargetV2()) {
+ setExceptionPointerRegister(Hexagon::R20);
+ setExceptionSelectorRegister(Hexagon::R21);
+ } else {
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::INLINEASM , MVT::Other, Custom);
+
+ setMinFunctionAlignment(2);
+
+ // Needed for DYNAMIC_STACKALLOC expansion.
+ unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
+ setStackPointerRegisterToSaveRestore(StackRegister);
+ setSchedulingPreference(Sched::VLIW);
+}
+
+
+const char*
+HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALL: return "HexagonISD::CALL";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ }
+}
+
+bool
+HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ EVT MTy1 = EVT::getEVT(Ty1);
+ EVT MTy2 = EVT::getEVT(Ty2);
+ if (!MTy1.isSimple() || !MTy2.isSimple()) {
+ return false;
+ }
+ return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isSimple() || !VT2.isSimple()) {
+ return false;
+ }
+ return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ // Frame & Return address. Currently unimplemented.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Hexagon.");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
+
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Hexagon Scheduler Hooks
+//===----------------------------------------------------------------------===//
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB)
+const {
+ switch (MI->getOpcode()) {
+ case Hexagon::ADJDYNALLOC: {
+ MachineFunction *MF = BB->getParent();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF->getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->addAllocaAdjustInst(MI);
+ return BB;
+ }
+ default: llvm_unreachable("Unexpected instr type to insert");
+ } // switch
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+HexagonTargetLowering::getRegForInlineAsmConstraint(const
+ std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r': // R0-R31
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::i32:
+ case MVT::i16:
+ case MVT::i8:
+ return std::make_pair(0U, Hexagon::IntRegsRegisterClass);
+ case MVT::i64:
+ return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass);
+ }
+ default:
+ llvm_unreachable("Unknown asm register class");
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // Allows a signed-extended 11-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+ return false;
+ }
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV) {
+ return false;
+ }
+
+ int Scale = AM.Scale;
+ if (Scale < 0) Scale = -Scale;
+ switch (Scale) {
+ case 0: // No scale reg, "r+i", "r", or just "i".
+ break;
+ default: // No scaled addressing mode.
+ return false;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // ***************************************************************************
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes.
+ // ***************************************************************************
+
+ // If this is a tail call via a function pointer, then don't do it!
+ if (!(dyn_cast<GlobalAddressSDNode>(Callee))
+ && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+ return false;
+ }
+
+ // Do not optimize if the calling conventions do not match.
+ if (!CCMatch)
+ return false;
+
+ // Do not tail call optimize vararg calls.
+ if (isVarArg)
+ return false;
+
+ // Also avoid tail call optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // In addition to the cases above, we also disable Tail Call Optimization if
+ // the calling convention code that at least one outgoing argument needs to
+ // go on the stack. We cannot check that here because at this point that
+ // information is not available.
+ return true;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 000000000000..4208bcb2fdca
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,162 @@
+//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_ISELLOWERING_H
+#define Hexagon_ISELLOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+ namespace HexagonISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ CONST32,
+ CONST32_GP, // For marking data present in GP.
+ SETCC,
+ ADJDYNALLOC,
+ ARGEXTEND,
+
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ BR_JT, // Jump table.
+ BARRIER, // Memory barrier.
+ WrapperJT,
+ TC_RETURN
+ };
+ }
+
+ class HexagonTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+
+ bool CanReturnSmallStruct(const Function* CalleeFn,
+ unsigned& RetSize) const;
+
+ public:
+ HexagonTargetMachine &TM;
+ explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock
+ *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+ }
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Intrinsics
+ virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ /// The type may be VoidTy, in which case only return true if the addressing
+ /// mode is legal for a load/store of any legal type.
+ /// TODO: Handle pre/postinc as well.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+ };
+} // end namespace llvm
+
+#endif // Hexagon_ISELLOWERING_H
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
new file mode 100644
index 000000000000..e78bb790ae7d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonImmediates.td
@@ -0,0 +1,508 @@
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// From IA64's InstrInfo file
+def s32Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s16Imm : Operand<i32> {
+ let PrintMethod = "printImmOperand";
+}
+
+def s12Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s11Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s11_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s11_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s11_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s11_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s10Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s9Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s8Imm64 : Operand<i64> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s4Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s4_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s4_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s4_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def s4_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u64Imm : Operand<i64> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u32Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u16Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u16_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u16_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u16_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u11_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u10Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u9Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u7Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u6_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u6_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u6_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u6_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u5Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u4Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def u1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def n8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def m6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printImmOperand";
+}
+
+def nOneImm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printNOneImmOperand";
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred : PatLeaf<(i32 imm), [{
+ // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x1000000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
+ // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x10000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<24,16>(v);
+}]>;
+
+def s16ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred : PatLeaf<(i32 imm), [{
+ // immS13 predicate - True if the immediate fits in a 13-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred : PatLeaf<(i32 imm), [{
+ // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred : PatLeaf<(i32 imm), [{
+ // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v);
+}]>;
+
+
+def s8ImmPred : PatLeaf<(i32 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred : PatLeaf<(i64 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred : PatLeaf<(i32 imm), [{
+ // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred : PatLeaf<(i32 imm), [{
+ // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred : PatLeaf<(i32 imm), [{
+ // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred : PatLeaf<(i32 imm), [{
+ // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred : PatLeaf<(i32 imm), [{
+ // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred : PatLeaf<(i64 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ // Adding "N ||" to supress gcc unused warning.
+ return (N || true);
+}]>;
+
+def u32ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u16ImmPred : PatLeaf<(i32 imm), [{
+ // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+ // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+ // extended s8 field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred : PatLeaf<(i32 imm), [{
+ // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred : PatLeaf<(i32 imm), [{
+ // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<8>(v);
+}]>;
+
+def u7ImmPred : PatLeaf<(i32 imm), [{
+ // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred : PatLeaf<(i32 imm), [{
+ // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred : PatLeaf<(i32 imm), [{
+ // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field. Same as u6ImmPred.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred : PatLeaf<(i32 imm), [{
+ // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 1 bit alinged - multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred : PatLeaf<(i32 imm), [{
+ // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 2 bits alinged - multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred : PatLeaf<(i32 imm), [{
+ // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 3 bits alinged - multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred : PatLeaf<(i32 imm), [{
+ // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred : PatLeaf<(i32 imm), [{
+ // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred : PatLeaf<(i32 imm), [{
+ // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred : PatLeaf<(i1 imm), [{
+ // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def m6ImmPred : PatLeaf<(i32 imm), [{
+ // m6ImmPred predicate - True if the immediate is negative and fits in
+ // a 6-bit negative number.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred : PatLeaf<(i32 imm), [{
+ // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred : PatLeaf<(i32 imm), [{
+ // nOneImmPred predicate - True if the immediate is -1.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-1 == v);
+}]>;
+
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 000000000000..48f0f01bb4cf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,308 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Hexagon Intruction Flags +
+//
+// *** Must match HexagonBaseInfo.h ***
+//===----------------------------------------------------------------------===//
+
+class Type<bits<5> t> {
+ bits<5> Value = t;
+}
+def TypePSEUDO : Type<0>;
+def TypeALU32 : Type<1>;
+def TypeCR : Type<2>;
+def TypeJR : Type<3>;
+def TypeJ : Type<4>;
+def TypeLD : Type<5>;
+def TypeST : Type<6>;
+def TypeSYSTEM : Type<7>;
+def TypeXTYPE : Type<8>;
+def TypeMARKER : Type<31>;
+
+//===----------------------------------------------------------------------===//
+// Intruction Class Declaration +
+//===----------------------------------------------------------------------===//
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr, InstrItinClass itin, Type type> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "Hexagon";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Constraints = cstr;
+ let Itinerary = itin;
+
+ // *** Must match HexagonBaseInfo.h ***
+ Type HexagonType = type;
+ let TSFlags{4-0} = HexagonType.Value;
+ bits<1> isHexagonSolo = 0;
+ let TSFlags{5} = isHexagonSolo;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{6} = isPredicated;
+
+ // *** The code above must match HexagonBaseInfo.h ***
+}
+
+//===----------------------------------------------------------------------===//
+// Intruction Classes Definitions +
+//===----------------------------------------------------------------------===//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+ let mayLoad = 1;
+}
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+ let mayLoad = 1;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+ let mayStore = 1;
+}
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+ let mayStore = 1;
+}
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> {
+// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
+// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> {
+ bits<16> imm16;
+}
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> {
+ bits<5> rs;
+ bits<5> pu; // Predicate register
+}
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> {
+ bits<5> rs;
+ bits<10> imm10;
+}
+
+class Marker<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Intruction Classes Definitions -
+//===----------------------------------------------------------------------===//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU64Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU64Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+// J Type Instructions.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : JType<outs, ins, asmstr, pattern> {
+}
+
+// JR type Instructions.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : JRType<outs, ins, asmstr, pattern> {
+}
+
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+ : STInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+ let mayStore = 1;
+}
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+ : LDInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+ let mayLoad = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 000000000000..49741a3d1b20
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,67 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+// Hexagon Intruction Flags +
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeMEMOP : Type<9>;
+def TypeNV : Type<10>;
+def TypePREFIX : Type<30>;
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+//
+// NV type instructions.
+//
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// Post increment ST Instruction.
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<6> imm6;
+}
+
+class Immext<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> {
+ let isCodeGenOnly = 1;
+
+ bits<26> imm26;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 000000000000..8685ec192c7e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,2732 @@
+//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/MathExtras.h"
+#define GET_INSTRINFO_CTOR
+#include "HexagonGenInstrInfo.inc"
+#include "HexagonGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = 4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = 8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = 2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = 1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = 32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = 64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = 32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = 16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = 8;
+
+
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+
+
+ switch (MI->getOpcode()) {
+ case Hexagon::LDriw:
+ case Hexagon::LDrid:
+ case Hexagon::LDrih:
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::STriw:
+ case Hexagon::STrid:
+ case Hexagon::STrih:
+ case Hexagon::STrib:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+unsigned
+HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const{
+
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_c;
+
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ int regPos = 0;
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // JMP_cNot.
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ BccOpc = Hexagon::JMP_cNot;
+ regPos = 1;
+ }
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
+ false)) {
+ MachineBasicBlock *NextBB =
+ llvm::next(MachineFunction::iterator(&MBB));
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, 0, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL,
+ get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ }
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+ return 2;
+}
+
+
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ FBB = NULL;
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return true;
+ } while (I != MBB.begin());
+
+ I = MBB.end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_c) {
+ // Block ends with fall-through true condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_cNot) {
+ // Block ends with fall-through false condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
+ if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
+ (SecondLastInst->getOpcode() == Hexagon::JMP_c)) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it.
+ if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Hexagon:JMPs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Hexagon::JMP &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_c;
+ int BccOpcNot = Hexagon::JMP_cNot;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
+ I->getOpcode() != BccOpcNot)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+ // Map Pd = Ps to Pd = or(Ps, Ps).
+ BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+ DestReg).addReg(SrcReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) {
+ // We can have an overlap between single and double reg: r1:0 = r0.
+ if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+ // r1:0 = r0
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ } else {
+ // r1:0 = r1 or no overlap.
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+ Hexagon::subreg_loreg))).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ }
+ return;
+ }
+ if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+ return;
+ }
+
+ llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STrid))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else {
+ llvm_unreachable("Unimplemented");
+ }
+}
+
+
+void HexagonInstrInfo::storeRegToAddr(
+ MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (RC == Hexagon::IntRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == Hexagon::DoubleRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == Hexagon::PredRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else {
+ llvm_unreachable("Can't store this register to stack slot");
+ }
+}
+
+
+void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ llvm_unreachable("Unimplemented");
+}
+
+
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ // Hexagon_TODO: Implement.
+ return(0);
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = Hexagon::PredRegsRegisterClass;
+ } else if (VT == MVT::i32) {
+ TRC = Hexagon::IntRegsRegisterClass;
+ } else if (VT == MVT::i64) {
+ TRC = Hexagon::DoubleRegsRegisterClass;
+ } else {
+ llvm_unreachable("Cannot handle this register class");
+ }
+
+ unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ return true;
+
+ // TFR_FI
+ case Hexagon::TFR_FI:
+ return true;
+
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+
+ // V4 absolute set addressing.
+ case Hexagon::LDrid_abs_setimm_V4:
+ case Hexagon::LDriw_abs_setimm_V4:
+ case Hexagon::LDrih_abs_setimm_V4:
+ case Hexagon::LDrib_abs_setimm_V4:
+ case Hexagon::LDriuh_abs_setimm_V4:
+ case Hexagon::LDriub_abs_setimm_V4:
+
+ case Hexagon::STrid_abs_setimm_V4:
+ case Hexagon::STrib_abs_setimm_V4:
+ case Hexagon::STrih_abs_setimm_V4:
+ case Hexagon::STriw_abs_setimm_V4:
+
+ // V4 global address load.
+ case Hexagon::LDrid_GP_cPt_V4 :
+ case Hexagon::LDrid_GP_cNotPt_V4 :
+ case Hexagon::LDrid_GP_cdnPt_V4 :
+ case Hexagon::LDrid_GP_cdnNotPt_V4 :
+ case Hexagon::LDrib_GP_cPt_V4 :
+ case Hexagon::LDrib_GP_cNotPt_V4 :
+ case Hexagon::LDrib_GP_cdnPt_V4 :
+ case Hexagon::LDrib_GP_cdnNotPt_V4 :
+ case Hexagon::LDriub_GP_cPt_V4 :
+ case Hexagon::LDriub_GP_cNotPt_V4 :
+ case Hexagon::LDriub_GP_cdnPt_V4 :
+ case Hexagon::LDriub_GP_cdnNotPt_V4 :
+ case Hexagon::LDrih_GP_cPt_V4 :
+ case Hexagon::LDrih_GP_cNotPt_V4 :
+ case Hexagon::LDrih_GP_cdnPt_V4 :
+ case Hexagon::LDrih_GP_cdnNotPt_V4 :
+ case Hexagon::LDriuh_GP_cPt_V4 :
+ case Hexagon::LDriuh_GP_cNotPt_V4 :
+ case Hexagon::LDriuh_GP_cdnPt_V4 :
+ case Hexagon::LDriuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDriw_GP_cPt_V4 :
+ case Hexagon::LDriw_GP_cNotPt_V4 :
+ case Hexagon::LDriw_GP_cdnPt_V4 :
+ case Hexagon::LDriw_GP_cdnNotPt_V4 :
+ case Hexagon::LDd_GP_cPt_V4 :
+ case Hexagon::LDd_GP_cNotPt_V4 :
+ case Hexagon::LDd_GP_cdnPt_V4 :
+ case Hexagon::LDd_GP_cdnNotPt_V4 :
+ case Hexagon::LDb_GP_cPt_V4 :
+ case Hexagon::LDb_GP_cNotPt_V4 :
+ case Hexagon::LDb_GP_cdnPt_V4 :
+ case Hexagon::LDb_GP_cdnNotPt_V4 :
+ case Hexagon::LDub_GP_cPt_V4 :
+ case Hexagon::LDub_GP_cNotPt_V4 :
+ case Hexagon::LDub_GP_cdnPt_V4 :
+ case Hexagon::LDub_GP_cdnNotPt_V4 :
+ case Hexagon::LDh_GP_cPt_V4 :
+ case Hexagon::LDh_GP_cNotPt_V4 :
+ case Hexagon::LDh_GP_cdnPt_V4 :
+ case Hexagon::LDh_GP_cdnNotPt_V4 :
+ case Hexagon::LDuh_GP_cPt_V4 :
+ case Hexagon::LDuh_GP_cNotPt_V4 :
+ case Hexagon::LDuh_GP_cdnPt_V4 :
+ case Hexagon::LDuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDw_GP_cPt_V4 :
+ case Hexagon::LDw_GP_cNotPt_V4 :
+ case Hexagon::LDw_GP_cdnPt_V4 :
+ case Hexagon::LDw_GP_cdnNotPt_V4 :
+
+ // V4 global address store.
+ case Hexagon::STrid_GP_cPt_V4 :
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ case Hexagon::STrid_GP_cdnPt_V4 :
+ case Hexagon::STrid_GP_cdnNotPt_V4 :
+ case Hexagon::STrib_GP_cPt_V4 :
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ case Hexagon::STrib_GP_cdnPt_V4 :
+ case Hexagon::STrib_GP_cdnNotPt_V4 :
+ case Hexagon::STrih_GP_cPt_V4 :
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ case Hexagon::STrih_GP_cdnPt_V4 :
+ case Hexagon::STrih_GP_cdnNotPt_V4 :
+ case Hexagon::STriw_GP_cPt_V4 :
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ case Hexagon::STriw_GP_cdnPt_V4 :
+ case Hexagon::STriw_GP_cdnNotPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cdnPt_V4 :
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cdnPt_V4 :
+ case Hexagon::STb_GP_cdnNotPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cdnPt_V4 :
+ case Hexagon::STh_GP_cdnNotPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cdnPt_V4 :
+ case Hexagon::STw_GP_cdnNotPt_V4 :
+
+ // V4 predicated global address new value store.
+ case Hexagon::STrib_GP_cPt_nv_V4 :
+ case Hexagon::STrib_GP_cNotPt_nv_V4 :
+ case Hexagon::STrib_GP_cdnPt_nv_V4 :
+ case Hexagon::STrib_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_GP_cPt_nv_V4 :
+ case Hexagon::STrih_GP_cNotPt_nv_V4 :
+ case Hexagon::STrih_GP_cdnPt_nv_V4 :
+ case Hexagon::STrih_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_GP_cPt_nv_V4 :
+ case Hexagon::STriw_GP_cNotPt_nv_V4 :
+ case Hexagon::STriw_GP_cdnPt_nv_V4 :
+ case Hexagon::STriw_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STb_GP_cPt_nv_V4 :
+ case Hexagon::STb_GP_cNotPt_nv_V4 :
+ case Hexagon::STb_GP_cdnPt_nv_V4 :
+ case Hexagon::STb_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STh_GP_cPt_nv_V4 :
+ case Hexagon::STh_GP_cNotPt_nv_V4 :
+ case Hexagon::STh_GP_cdnPt_nv_V4 :
+ case Hexagon::STh_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STw_GP_cPt_nv_V4 :
+ case Hexagon::STw_GP_cNotPt_nv_V4 :
+ case Hexagon::STw_GP_cdnPt_nv_V4 :
+ case Hexagon::STw_GP_cdnNotPt_nv_V4 :
+
+ // TFR_FI
+ case Hexagon::TFR_FI_immext_V4:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown type of instruction");
+
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ return Hexagon::JMP_EQriPt_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ return Hexagon::JMP_EQriNotPt_ie_nv_V4;
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ return Hexagon::JMP_EQriPnt_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ return Hexagon::JMP_EQriNotPnt_ie_nv_V4;
+
+ // JMP_EQri -- with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ return Hexagon::JMP_EQriPtneg_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ return Hexagon::JMP_EQriNotPtneg_ie_nv_V4;
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ return Hexagon::JMP_EQriPntneg_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ return Hexagon::JMP_EQriNotPntneg_ie_nv_V4;
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ return Hexagon::JMP_EQrrPt_ie_nv_V4;
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ return Hexagon::JMP_EQrrNotPt_ie_nv_V4;
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ return Hexagon::JMP_EQrrPnt_ie_nv_V4;
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ return Hexagon::JMP_EQrrNotPnt_ie_nv_V4;
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ return Hexagon::JMP_GTriPt_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ return Hexagon::JMP_GTriNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ return Hexagon::JMP_GTriPnt_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ return Hexagon::JMP_GTriNotPnt_ie_nv_V4;
+
+ // JMP_GTri -- with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ return Hexagon::JMP_GTriPtneg_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ return Hexagon::JMP_GTriNotPtneg_ie_nv_V4;
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ return Hexagon::JMP_GTriPntneg_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ return Hexagon::JMP_GTriNotPntneg_ie_nv_V4;
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ return Hexagon::JMP_GTrrPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ return Hexagon::JMP_GTrrNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ return Hexagon::JMP_GTrrPnt_ie_nv_V4;
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrNotPnt_ie_nv_V4;
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ return Hexagon::JMP_GTrrdnPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnPnt_ie_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4;
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ return Hexagon::JMP_GTUriPt_ie_nv_V4;
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ return Hexagon::JMP_GTUriNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ return Hexagon::JMP_GTUriPnt_ie_nv_V4;
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ return Hexagon::JMP_GTUriNotPnt_ie_nv_V4;
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ return Hexagon::JMP_GTUrrPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ return Hexagon::JMP_GTUrrPnt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4;
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4;
+
+ case Hexagon::TFR_FI:
+ return Hexagon::TFR_FI_immext_V4;
+
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ llvm_unreachable("Needs implementing");
+ }
+}
+
+unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const {
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown type of jump instruction");
+
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ return Hexagon::JMP_EQriPt_nv_V4;
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPt_nv_V4;
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ return Hexagon::JMP_EQriPnt_nv_V4;
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPnt_nv_V4;
+
+ // JMP_EQri -- with -1
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ return Hexagon::JMP_EQriPtneg_nv_V4;
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPtneg_nv_V4;
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ return Hexagon::JMP_EQriPntneg_nv_V4;
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPntneg_nv_V4;
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ return Hexagon::JMP_EQrrPt_nv_V4;
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ return Hexagon::JMP_EQrrNotPt_nv_V4;
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ return Hexagon::JMP_EQrrPnt_nv_V4;
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+ return Hexagon::JMP_EQrrNotPnt_nv_V4;
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ return Hexagon::JMP_GTriPt_nv_V4;
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPt_nv_V4;
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ return Hexagon::JMP_GTriPnt_nv_V4;
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPnt_nv_V4;
+
+ // JMP_GTri -- with -1
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPtneg_nv_V4;
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ return Hexagon::JMP_GTriPntneg_nv_V4;
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPntneg_nv_V4;
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrPt_nv_V4;
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrNotPt_nv_V4;
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrPnt_nv_V4;
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrNotPnt_nv_V4;
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnPt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnPnt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ return Hexagon::JMP_GTUriPt_nv_V4;
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTUriNotPt_nv_V4;
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUriPnt_nv_V4;
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUriNotPnt_nv_V4;
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrPt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrPnt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrNotPnt_nv_V4;
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnPt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnPnt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
+ }
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+
+ // Store Byte
+ case Hexagon::STrib_nv_V4:
+ case Hexagon::STrib_indexed_nv_V4:
+ case Hexagon::STrib_indexed_shl_nv_V4:
+ case Hexagon::STrib_shl_nv_V4:
+ case Hexagon::STrib_GP_nv_V4:
+ case Hexagon::STb_GP_nv_V4:
+ case Hexagon::POST_STbri_nv_V4:
+ case Hexagon::STrib_cPt_nv_V4:
+ case Hexagon::STrib_cdnPt_nv_V4:
+ case Hexagon::STrib_cNotPt_nv_V4:
+ case Hexagon::STrib_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_cPt_nv_V4:
+ case Hexagon::STrib_indexed_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_cNotPt_nv_V4:
+ case Hexagon::STrib_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_STbri_cPt_nv_V4:
+ case Hexagon::POST_STbri_cdnPt_nv_V4:
+ case Hexagon::POST_STbri_cNotPt_nv_V4:
+ case Hexagon::POST_STbri_cdnNotPt_nv_V4:
+ case Hexagon::STb_GP_cPt_nv_V4:
+ case Hexagon::STb_GP_cNotPt_nv_V4:
+ case Hexagon::STb_GP_cdnPt_nv_V4:
+ case Hexagon::STb_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_GP_cPt_nv_V4:
+ case Hexagon::STrib_GP_cNotPt_nv_V4:
+ case Hexagon::STrib_GP_cdnPt_nv_V4:
+ case Hexagon::STrib_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_abs_nv_V4:
+ case Hexagon::STrib_abs_cPt_nv_V4:
+ case Hexagon::STrib_abs_cdnPt_nv_V4:
+ case Hexagon::STrib_abs_cNotPt_nv_V4:
+ case Hexagon::STrib_abs_cdnNotPt_nv_V4:
+ case Hexagon::STrib_imm_abs_nv_V4:
+ case Hexagon::STrib_imm_abs_cPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
+
+ // Store Halfword
+ case Hexagon::STrih_nv_V4:
+ case Hexagon::STrih_indexed_nv_V4:
+ case Hexagon::STrih_indexed_shl_nv_V4:
+ case Hexagon::STrih_shl_nv_V4:
+ case Hexagon::STrih_GP_nv_V4:
+ case Hexagon::STh_GP_nv_V4:
+ case Hexagon::POST_SThri_nv_V4:
+ case Hexagon::STrih_cPt_nv_V4:
+ case Hexagon::STrih_cdnPt_nv_V4:
+ case Hexagon::STrih_cNotPt_nv_V4:
+ case Hexagon::STrih_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cPt_nv_V4:
+ case Hexagon::STrih_indexed_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_cNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_SThri_cPt_nv_V4:
+ case Hexagon::POST_SThri_cdnPt_nv_V4:
+ case Hexagon::POST_SThri_cNotPt_nv_V4:
+ case Hexagon::POST_SThri_cdnNotPt_nv_V4:
+ case Hexagon::STh_GP_cPt_nv_V4:
+ case Hexagon::STh_GP_cNotPt_nv_V4:
+ case Hexagon::STh_GP_cdnPt_nv_V4:
+ case Hexagon::STh_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_GP_cPt_nv_V4:
+ case Hexagon::STrih_GP_cNotPt_nv_V4:
+ case Hexagon::STrih_GP_cdnPt_nv_V4:
+ case Hexagon::STrih_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_abs_nv_V4:
+ case Hexagon::STrih_abs_cPt_nv_V4:
+ case Hexagon::STrih_abs_cdnPt_nv_V4:
+ case Hexagon::STrih_abs_cNotPt_nv_V4:
+ case Hexagon::STrih_abs_cdnNotPt_nv_V4:
+ case Hexagon::STrih_imm_abs_nv_V4:
+ case Hexagon::STrih_imm_abs_cPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
+
+ // Store Word
+ case Hexagon::STriw_nv_V4:
+ case Hexagon::STriw_indexed_nv_V4:
+ case Hexagon::STriw_indexed_shl_nv_V4:
+ case Hexagon::STriw_shl_nv_V4:
+ case Hexagon::STriw_GP_nv_V4:
+ case Hexagon::STw_GP_nv_V4:
+ case Hexagon::POST_STwri_nv_V4:
+ case Hexagon::STriw_cPt_nv_V4:
+ case Hexagon::STriw_cdnPt_nv_V4:
+ case Hexagon::STriw_cNotPt_nv_V4:
+ case Hexagon::STriw_cdnNotPt_nv_V4:
+ case Hexagon::STriw_indexed_cPt_nv_V4:
+ case Hexagon::STriw_indexed_cdnPt_nv_V4:
+ case Hexagon::STriw_indexed_cNotPt_nv_V4:
+ case Hexagon::STriw_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_STwri_cPt_nv_V4:
+ case Hexagon::POST_STwri_cdnPt_nv_V4:
+ case Hexagon::POST_STwri_cNotPt_nv_V4:
+ case Hexagon::POST_STwri_cdnNotPt_nv_V4:
+ case Hexagon::STw_GP_cPt_nv_V4:
+ case Hexagon::STw_GP_cNotPt_nv_V4:
+ case Hexagon::STw_GP_cdnPt_nv_V4:
+ case Hexagon::STw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_GP_cPt_nv_V4:
+ case Hexagon::STriw_GP_cNotPt_nv_V4:
+ case Hexagon::STriw_GP_cdnPt_nv_V4:
+ case Hexagon::STriw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_abs_nv_V4:
+ case Hexagon::STriw_abs_cPt_nv_V4:
+ case Hexagon::STriw_abs_cdnPt_nv_V4:
+ case Hexagon::STriw_abs_cNotPt_nv_V4:
+ case Hexagon::STriw_abs_cdnNotPt_nv_V4:
+ case Hexagon::STriw_imm_abs_nv_V4:
+ case Hexagon::STriw_imm_abs_cPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
+ switch (MI->getOpcode())
+ {
+ // Load Byte
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDrib_cPt:
+ case Hexagon::POST_LDrib_cNotPt:
+ case Hexagon::POST_LDrib_cdnPt_V4:
+ case Hexagon::POST_LDrib_cdnNotPt_V4:
+
+ // Load unsigned byte
+ case Hexagon::POST_LDriub:
+ case Hexagon::POST_LDriub_cPt:
+ case Hexagon::POST_LDriub_cNotPt:
+ case Hexagon::POST_LDriub_cdnPt_V4:
+ case Hexagon::POST_LDriub_cdnNotPt_V4:
+
+ // Load halfword
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDrih_cPt:
+ case Hexagon::POST_LDrih_cNotPt:
+ case Hexagon::POST_LDrih_cdnPt_V4:
+ case Hexagon::POST_LDrih_cdnNotPt_V4:
+
+ // Load unsigned halfword
+ case Hexagon::POST_LDriuh:
+ case Hexagon::POST_LDriuh_cPt:
+ case Hexagon::POST_LDriuh_cNotPt:
+ case Hexagon::POST_LDriuh_cdnPt_V4:
+ case Hexagon::POST_LDriuh_cdnNotPt_V4:
+
+ // Load word
+ case Hexagon::POST_LDriw:
+ case Hexagon::POST_LDriw_cPt:
+ case Hexagon::POST_LDriw_cNotPt:
+ case Hexagon::POST_LDriw_cdnPt_V4:
+ case Hexagon::POST_LDriw_cdnNotPt_V4:
+
+ // Load double word
+ case Hexagon::POST_LDrid:
+ case Hexagon::POST_LDrid_cPt:
+ case Hexagon::POST_LDrid_cNotPt:
+ case Hexagon::POST_LDrid_cdnPt_V4:
+ case Hexagon::POST_LDrid_cdnNotPt_V4:
+
+ // Store byte
+ case Hexagon::POST_STbri:
+ case Hexagon::POST_STbri_cPt:
+ case Hexagon::POST_STbri_cNotPt:
+ case Hexagon::POST_STbri_cdnPt_V4:
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+
+ // Store halfword
+ case Hexagon::POST_SThri:
+ case Hexagon::POST_SThri_cPt:
+ case Hexagon::POST_SThri_cNotPt:
+ case Hexagon::POST_SThri_cdnPt_V4:
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+
+ // Store word
+ case Hexagon::POST_STwri:
+ case Hexagon::POST_STwri_cPt:
+ case Hexagon::POST_STwri_cNotPt:
+ case Hexagon::POST_STwri_cdnPt_V4:
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+
+ // Store double word
+ case Hexagon::POST_STdri:
+ case Hexagon::POST_STdri_cPt:
+ case Hexagon::POST_STdri_cNotPt:
+ case Hexagon::POST_STdri_cdnPt_V4:
+ case Hexagon::POST_STdri_cdnNotPt_V4:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+ return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
+}
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+ bool isPred = MI->getDesc().isPredicable();
+
+ if (!isPred)
+ return false;
+
+ const int Opc = MI->getOpcode();
+
+ switch(Opc) {
+ case Hexagon::TFRI:
+ return isInt<12>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_nv_V4:
+ return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_nv_V4:
+ return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_nv_V4:
+ return isUInt<6>(MI->getOperand(1).getImm());
+
+ case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
+ return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::LDrih_indexed:
+ case Hexagon::LDriuh_indexed:
+ return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDrib_indexed:
+ case Hexagon::LDriub_indexed:
+ return isUInt<6>(MI->getOperand(2).getImm());
+
+ case Hexagon::POST_LDrid:
+ return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDriw:
+ return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDriuh:
+ return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDriub:
+ return isInt<4>(MI->getOperand(3).getImm());
+
+ case Hexagon::STrib_imm_V4:
+ case Hexagon::STrih_imm_V4:
+ case Hexagon::STriw_imm_V4:
+ return (isUInt<6>(MI->getOperand(1).getImm()) &&
+ isInt<6>(MI->getOperand(2).getImm()));
+
+ case Hexagon::ADD_ri:
+ return isInt<8>(MI->getOperand(2).getImm());
+
+ case Hexagon::ASLH:
+ case Hexagon::ASRH:
+ case Hexagon::SXTB:
+ case Hexagon::SXTH:
+ case Hexagon::ZXTB:
+ case Hexagon::ZXTH:
+ return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+ case Hexagon::JMPR:
+ return false;
+ }
+
+ return true;
+}
+
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+ switch(Opc) {
+ case Hexagon::TFR_cPt:
+ return Hexagon::TFR_cNotPt;
+ case Hexagon::TFR_cNotPt:
+ return Hexagon::TFR_cPt;
+
+ case Hexagon::TFRI_cPt:
+ return Hexagon::TFRI_cNotPt;
+ case Hexagon::TFRI_cNotPt:
+ return Hexagon::TFRI_cPt;
+
+ case Hexagon::JMP_c:
+ return Hexagon::JMP_cNot;
+ case Hexagon::JMP_cNot:
+ return Hexagon::JMP_c;
+
+ case Hexagon::ADD_ri_cPt:
+ return Hexagon::ADD_ri_cNotPt;
+ case Hexagon::ADD_ri_cNotPt:
+ return Hexagon::ADD_ri_cPt;
+
+ case Hexagon::ADD_rr_cPt:
+ return Hexagon::ADD_rr_cNotPt;
+ case Hexagon::ADD_rr_cNotPt:
+ return Hexagon::ADD_rr_cPt;
+
+ case Hexagon::XOR_rr_cPt:
+ return Hexagon::XOR_rr_cNotPt;
+ case Hexagon::XOR_rr_cNotPt:
+ return Hexagon::XOR_rr_cPt;
+
+ case Hexagon::AND_rr_cPt:
+ return Hexagon::AND_rr_cNotPt;
+ case Hexagon::AND_rr_cNotPt:
+ return Hexagon::AND_rr_cPt;
+
+ case Hexagon::OR_rr_cPt:
+ return Hexagon::OR_rr_cNotPt;
+ case Hexagon::OR_rr_cNotPt:
+ return Hexagon::OR_rr_cPt;
+
+ case Hexagon::SUB_rr_cPt:
+ return Hexagon::SUB_rr_cNotPt;
+ case Hexagon::SUB_rr_cNotPt:
+ return Hexagon::SUB_rr_cPt;
+
+ case Hexagon::COMBINE_rr_cPt:
+ return Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::COMBINE_rr_cNotPt:
+ return Hexagon::COMBINE_rr_cPt;
+
+ case Hexagon::ASLH_cPt_V4:
+ return Hexagon::ASLH_cNotPt_V4;
+ case Hexagon::ASLH_cNotPt_V4:
+ return Hexagon::ASLH_cPt_V4;
+
+ case Hexagon::ASRH_cPt_V4:
+ return Hexagon::ASRH_cNotPt_V4;
+ case Hexagon::ASRH_cNotPt_V4:
+ return Hexagon::ASRH_cPt_V4;
+
+ case Hexagon::SXTB_cPt_V4:
+ return Hexagon::SXTB_cNotPt_V4;
+ case Hexagon::SXTB_cNotPt_V4:
+ return Hexagon::SXTB_cPt_V4;
+
+ case Hexagon::SXTH_cPt_V4:
+ return Hexagon::SXTH_cNotPt_V4;
+ case Hexagon::SXTH_cNotPt_V4:
+ return Hexagon::SXTH_cPt_V4;
+
+ case Hexagon::ZXTB_cPt_V4:
+ return Hexagon::ZXTB_cNotPt_V4;
+ case Hexagon::ZXTB_cNotPt_V4:
+ return Hexagon::ZXTB_cPt_V4;
+
+ case Hexagon::ZXTH_cPt_V4:
+ return Hexagon::ZXTH_cNotPt_V4;
+ case Hexagon::ZXTH_cNotPt_V4:
+ return Hexagon::ZXTH_cPt_V4;
+
+
+ case Hexagon::JMPR_cPt:
+ return Hexagon::JMPR_cNotPt;
+ case Hexagon::JMPR_cNotPt:
+ return Hexagon::JMPR_cPt;
+
+ // V4 indexed+scaled load.
+ case Hexagon::LDrid_indexed_cPt_V4:
+ return Hexagon::LDrid_indexed_cNotPt_V4;
+ case Hexagon::LDrid_indexed_cNotPt_V4:
+ return Hexagon::LDrid_indexed_cPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cPt_V4:
+ return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4:
+ return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrib_indexed_cPt_V4:
+ return Hexagon::LDrib_indexed_cNotPt_V4;
+ case Hexagon::LDrib_indexed_cNotPt_V4:
+ return Hexagon::LDrib_indexed_cPt_V4;
+
+ case Hexagon::LDriub_indexed_cPt_V4:
+ return Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDriub_indexed_cNotPt_V4:
+ return Hexagon::LDriub_indexed_cPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cPt_V4:
+ return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4:
+ return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cPt_V4:
+ return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4:
+ return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrih_indexed_cPt_V4:
+ return Hexagon::LDrih_indexed_cNotPt_V4;
+ case Hexagon::LDrih_indexed_cNotPt_V4:
+ return Hexagon::LDrih_indexed_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_cPt_V4:
+ return Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_cNotPt_V4:
+ return Hexagon::LDriuh_indexed_cPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cPt_V4:
+ return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4:
+ return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cPt_V4:
+ return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4:
+ return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriw_indexed_cPt_V4:
+ return Hexagon::LDriw_indexed_cNotPt_V4;
+ case Hexagon::LDriw_indexed_cNotPt_V4:
+ return Hexagon::LDriw_indexed_cPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cPt_V4:
+ return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4:
+ return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+ // Byte.
+ case Hexagon::POST_STbri_cPt:
+ return Hexagon::POST_STbri_cNotPt;
+ case Hexagon::POST_STbri_cNotPt:
+ return Hexagon::POST_STbri_cPt;
+
+ case Hexagon::STrib_cPt:
+ return Hexagon::STrib_cNotPt;
+ case Hexagon::STrib_cNotPt:
+ return Hexagon::STrib_cPt;
+
+ case Hexagon::STrib_indexed_cPt:
+ return Hexagon::STrib_indexed_cNotPt;
+ case Hexagon::STrib_indexed_cNotPt:
+ return Hexagon::STrib_indexed_cPt;
+
+ case Hexagon::STrib_imm_cPt_V4:
+ return Hexagon::STrib_imm_cNotPt_V4;
+ case Hexagon::STrib_imm_cNotPt_V4:
+ return Hexagon::STrib_imm_cPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ return Hexagon::STrib_indexed_shl_cNotPt_V4;
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cPt_V4;
+
+ // Halfword.
+ case Hexagon::POST_SThri_cPt:
+ return Hexagon::POST_SThri_cNotPt;
+ case Hexagon::POST_SThri_cNotPt:
+ return Hexagon::POST_SThri_cPt;
+
+ case Hexagon::STrih_cPt:
+ return Hexagon::STrih_cNotPt;
+ case Hexagon::STrih_cNotPt:
+ return Hexagon::STrih_cPt;
+
+ case Hexagon::STrih_indexed_cPt:
+ return Hexagon::STrih_indexed_cNotPt;
+ case Hexagon::STrih_indexed_cNotPt:
+ return Hexagon::STrih_indexed_cPt;
+
+ case Hexagon::STrih_imm_cPt_V4:
+ return Hexagon::STrih_imm_cNotPt_V4;
+ case Hexagon::STrih_imm_cNotPt_V4:
+ return Hexagon::STrih_imm_cPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ return Hexagon::STrih_indexed_shl_cNotPt_V4;
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cPt_V4;
+
+ // Word.
+ case Hexagon::POST_STwri_cPt:
+ return Hexagon::POST_STwri_cNotPt;
+ case Hexagon::POST_STwri_cNotPt:
+ return Hexagon::POST_STwri_cPt;
+
+ case Hexagon::STriw_cPt:
+ return Hexagon::STriw_cNotPt;
+ case Hexagon::STriw_cNotPt:
+ return Hexagon::STriw_cPt;
+
+ case Hexagon::STriw_indexed_cPt:
+ return Hexagon::STriw_indexed_cNotPt;
+ case Hexagon::STriw_indexed_cNotPt:
+ return Hexagon::STriw_indexed_cPt;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ return Hexagon::STriw_indexed_shl_cNotPt_V4;
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cPt_V4;
+
+ case Hexagon::STriw_imm_cPt_V4:
+ return Hexagon::STriw_imm_cNotPt_V4;
+ case Hexagon::STriw_imm_cNotPt_V4:
+ return Hexagon::STriw_imm_cPt_V4;
+
+ // Double word.
+ case Hexagon::POST_STdri_cPt:
+ return Hexagon::POST_STdri_cNotPt;
+ case Hexagon::POST_STdri_cNotPt:
+ return Hexagon::POST_STdri_cPt;
+
+ case Hexagon::STrid_cPt:
+ return Hexagon::STrid_cNotPt;
+ case Hexagon::STrid_cNotPt:
+ return Hexagon::STrid_cPt;
+
+ case Hexagon::STrid_indexed_cPt:
+ return Hexagon::STrid_indexed_cNotPt;
+ case Hexagon::STrid_indexed_cNotPt:
+ return Hexagon::STrid_indexed_cPt;
+
+ case Hexagon::STrid_indexed_shl_cPt_V4:
+ return Hexagon::STrid_indexed_shl_cNotPt_V4;
+ case Hexagon::STrid_indexed_shl_cNotPt_V4:
+ return Hexagon::STrid_indexed_shl_cPt_V4;
+
+ // Load.
+ case Hexagon::LDrid_cPt:
+ return Hexagon::LDrid_cNotPt;
+ case Hexagon::LDrid_cNotPt:
+ return Hexagon::LDrid_cPt;
+
+ case Hexagon::LDriw_cPt:
+ return Hexagon::LDriw_cNotPt;
+ case Hexagon::LDriw_cNotPt:
+ return Hexagon::LDriw_cPt;
+
+ case Hexagon::LDrih_cPt:
+ return Hexagon::LDrih_cNotPt;
+ case Hexagon::LDrih_cNotPt:
+ return Hexagon::LDrih_cPt;
+
+ case Hexagon::LDriuh_cPt:
+ return Hexagon::LDriuh_cNotPt;
+ case Hexagon::LDriuh_cNotPt:
+ return Hexagon::LDriuh_cPt;
+
+ case Hexagon::LDrib_cPt:
+ return Hexagon::LDrib_cNotPt;
+ case Hexagon::LDrib_cNotPt:
+ return Hexagon::LDrib_cPt;
+
+ case Hexagon::LDriub_cPt:
+ return Hexagon::LDriub_cNotPt;
+ case Hexagon::LDriub_cNotPt:
+ return Hexagon::LDriub_cPt;
+
+ // Load Indexed.
+ case Hexagon::LDrid_indexed_cPt:
+ return Hexagon::LDrid_indexed_cNotPt;
+ case Hexagon::LDrid_indexed_cNotPt:
+ return Hexagon::LDrid_indexed_cPt;
+
+ case Hexagon::LDriw_indexed_cPt:
+ return Hexagon::LDriw_indexed_cNotPt;
+ case Hexagon::LDriw_indexed_cNotPt:
+ return Hexagon::LDriw_indexed_cPt;
+
+ case Hexagon::LDrih_indexed_cPt:
+ return Hexagon::LDrih_indexed_cNotPt;
+ case Hexagon::LDrih_indexed_cNotPt:
+ return Hexagon::LDrih_indexed_cPt;
+
+ case Hexagon::LDriuh_indexed_cPt:
+ return Hexagon::LDriuh_indexed_cNotPt;
+ case Hexagon::LDriuh_indexed_cNotPt:
+ return Hexagon::LDriuh_indexed_cPt;
+
+ case Hexagon::LDrib_indexed_cPt:
+ return Hexagon::LDrib_indexed_cNotPt;
+ case Hexagon::LDrib_indexed_cNotPt:
+ return Hexagon::LDrib_indexed_cPt;
+
+ case Hexagon::LDriub_indexed_cPt:
+ return Hexagon::LDriub_indexed_cNotPt;
+ case Hexagon::LDriub_indexed_cNotPt:
+ return Hexagon::LDriub_indexed_cPt;
+
+ // Post Inc Load.
+ case Hexagon::POST_LDrid_cPt:
+ return Hexagon::POST_LDrid_cNotPt;
+ case Hexagon::POST_LDriw_cNotPt:
+ return Hexagon::POST_LDriw_cPt;
+
+ case Hexagon::POST_LDrih_cPt:
+ return Hexagon::POST_LDrih_cNotPt;
+ case Hexagon::POST_LDrih_cNotPt:
+ return Hexagon::POST_LDrih_cPt;
+
+ case Hexagon::POST_LDriuh_cPt:
+ return Hexagon::POST_LDriuh_cNotPt;
+ case Hexagon::POST_LDriuh_cNotPt:
+ return Hexagon::POST_LDriuh_cPt;
+
+ case Hexagon::POST_LDrib_cPt:
+ return Hexagon::POST_LDrib_cNotPt;
+ case Hexagon::POST_LDrib_cNotPt:
+ return Hexagon::POST_LDrib_cPt;
+
+ case Hexagon::POST_LDriub_cPt:
+ return Hexagon::POST_LDriub_cNotPt;
+ case Hexagon::POST_LDriub_cNotPt:
+ return Hexagon::POST_LDriub_cPt;
+
+ // Dealloc_return.
+ case Hexagon::DEALLOC_RET_cPt_V4:
+ return Hexagon::DEALLOC_RET_cNotPt_V4;
+ case Hexagon::DEALLOC_RET_cNotPt_V4:
+ return Hexagon::DEALLOC_RET_cPt_V4;
+
+ // New Value Jump.
+ // JMPEQ_ri - with -1.
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ return Hexagon::JMP_EQriNotPtneg_nv_V4;
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ return Hexagon::JMP_EQriPtneg_nv_V4;
+
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ return Hexagon::JMP_EQriNotPntneg_nv_V4;
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ return Hexagon::JMP_EQriPntneg_nv_V4;
+
+ // JMPEQ_ri.
+ case Hexagon::JMP_EQriPt_nv_V4:
+ return Hexagon::JMP_EQriNotPt_nv_V4;
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ return Hexagon::JMP_EQriPt_nv_V4;
+
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ return Hexagon::JMP_EQriNotPnt_nv_V4;
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ return Hexagon::JMP_EQriPnt_nv_V4;
+
+ // JMPEQ_rr.
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ return Hexagon::JMP_EQrrNotPt_nv_V4;
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ return Hexagon::JMP_EQrrPt_nv_V4;
+
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ return Hexagon::JMP_EQrrNotPnt_nv_V4;
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ return Hexagon::JMP_EQrrPnt_nv_V4;
+
+ // JMPGT_ri - with -1.
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ return Hexagon::JMP_GTriNotPtneg_nv_V4;
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ return Hexagon::JMP_GTriNotPntneg_nv_V4;
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ return Hexagon::JMP_GTriPntneg_nv_V4;
+
+ // JMPGT_ri.
+ case Hexagon::JMP_GTriPt_nv_V4:
+ return Hexagon::JMP_GTriNotPt_nv_V4;
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ return Hexagon::JMP_GTriPt_nv_V4;
+
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ return Hexagon::JMP_GTriNotPnt_nv_V4;
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ return Hexagon::JMP_GTriPnt_nv_V4;
+
+ // JMPGT_rr.
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ return Hexagon::JMP_GTrrNotPt_nv_V4;
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ return Hexagon::JMP_GTrrPt_nv_V4;
+
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ return Hexagon::JMP_GTrrNotPnt_nv_V4;
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrPnt_nv_V4;
+
+ // JMPGT_rrdn.
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTrrdnPt_nv_V4;
+
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnPnt_nv_V4;
+
+ // JMPGTU_ri.
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ return Hexagon::JMP_GTUriNotPt_nv_V4;
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ return Hexagon::JMP_GTUriPt_nv_V4;
+
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ return Hexagon::JMP_GTUriNotPnt_nv_V4;
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ return Hexagon::JMP_GTUriPnt_nv_V4;
+
+ // JMPGTU_rr.
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrPt_nv_V4;
+
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPnt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrPnt_nv_V4;
+
+ // JMPGTU_rrdn.
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPt_nv_V4;
+
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPnt_nv_V4;
+
+ default:
+ llvm_unreachable("Unexpected predicated instruction");
+ }
+}
+
+
+int HexagonInstrInfo::
+getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+ switch(Opc) {
+ case Hexagon::TFR:
+ return !invertPredicate ? Hexagon::TFR_cPt :
+ Hexagon::TFR_cNotPt;
+ case Hexagon::TFRI:
+ return !invertPredicate ? Hexagon::TFRI_cPt :
+ Hexagon::TFRI_cNotPt;
+ case Hexagon::JMP:
+ return !invertPredicate ? Hexagon::JMP_c :
+ Hexagon::JMP_cNot;
+ case Hexagon::ADD_ri:
+ return !invertPredicate ? Hexagon::ADD_ri_cPt :
+ Hexagon::ADD_ri_cNotPt;
+ case Hexagon::ADD_rr:
+ return !invertPredicate ? Hexagon::ADD_rr_cPt :
+ Hexagon::ADD_rr_cNotPt;
+ case Hexagon::XOR_rr:
+ return !invertPredicate ? Hexagon::XOR_rr_cPt :
+ Hexagon::XOR_rr_cNotPt;
+ case Hexagon::AND_rr:
+ return !invertPredicate ? Hexagon::AND_rr_cPt :
+ Hexagon::AND_rr_cNotPt;
+ case Hexagon::OR_rr:
+ return !invertPredicate ? Hexagon::OR_rr_cPt :
+ Hexagon::OR_rr_cNotPt;
+ case Hexagon::SUB_rr:
+ return !invertPredicate ? Hexagon::SUB_rr_cPt :
+ Hexagon::SUB_rr_cNotPt;
+ case Hexagon::COMBINE_rr:
+ return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
+ Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::ASLH:
+ return !invertPredicate ? Hexagon::ASLH_cPt_V4 :
+ Hexagon::ASLH_cNotPt_V4;
+ case Hexagon::ASRH:
+ return !invertPredicate ? Hexagon::ASRH_cPt_V4 :
+ Hexagon::ASRH_cNotPt_V4;
+ case Hexagon::SXTB:
+ return !invertPredicate ? Hexagon::SXTB_cPt_V4 :
+ Hexagon::SXTB_cNotPt_V4;
+ case Hexagon::SXTH:
+ return !invertPredicate ? Hexagon::SXTH_cPt_V4 :
+ Hexagon::SXTH_cNotPt_V4;
+ case Hexagon::ZXTB:
+ return !invertPredicate ? Hexagon::ZXTB_cPt_V4 :
+ Hexagon::ZXTB_cNotPt_V4;
+ case Hexagon::ZXTH:
+ return !invertPredicate ? Hexagon::ZXTH_cPt_V4 :
+ Hexagon::ZXTH_cNotPt_V4;
+
+ case Hexagon::JMPR:
+ return !invertPredicate ? Hexagon::JMPR_cPt :
+ Hexagon::JMPR_cNotPt;
+
+ // V4 indexed+scaled load.
+ case Hexagon::LDrid_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
+ Hexagon::LDrid_indexed_cNotPt_V4;
+ case Hexagon::LDrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
+ Hexagon::LDrid_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrib_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
+ Hexagon::LDrib_indexed_cNotPt_V4;
+ case Hexagon::LDriub_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+ Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDriub_ae_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+ Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
+ Hexagon::LDrib_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_ae_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrih_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
+ Hexagon::LDrih_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+ Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_ae_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+ Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
+ Hexagon::LDrih_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_ae_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriw_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
+ Hexagon::LDriw_indexed_cNotPt_V4;
+ case Hexagon::LDriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
+ Hexagon::LDriw_indexed_shl_cNotPt_V4;
+ // Byte.
+ case Hexagon::POST_STbri:
+ return !invertPredicate ? Hexagon::POST_STbri_cPt :
+ Hexagon::POST_STbri_cNotPt;
+ case Hexagon::STrib:
+ return !invertPredicate ? Hexagon::STrib_cPt :
+ Hexagon::STrib_cNotPt;
+ case Hexagon::STrib_indexed:
+ return !invertPredicate ? Hexagon::STrib_indexed_cPt :
+ Hexagon::STrib_indexed_cNotPt;
+ case Hexagon::STrib_imm_V4:
+ return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 :
+ Hexagon::STrib_imm_cNotPt_V4;
+ case Hexagon::STrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 :
+ Hexagon::STrib_indexed_shl_cNotPt_V4;
+ // Halfword.
+ case Hexagon::POST_SThri:
+ return !invertPredicate ? Hexagon::POST_SThri_cPt :
+ Hexagon::POST_SThri_cNotPt;
+ case Hexagon::STrih:
+ return !invertPredicate ? Hexagon::STrih_cPt :
+ Hexagon::STrih_cNotPt;
+ case Hexagon::STrih_indexed:
+ return !invertPredicate ? Hexagon::STrih_indexed_cPt :
+ Hexagon::STrih_indexed_cNotPt;
+ case Hexagon::STrih_imm_V4:
+ return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 :
+ Hexagon::STrih_imm_cNotPt_V4;
+ case Hexagon::STrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 :
+ Hexagon::STrih_indexed_shl_cNotPt_V4;
+ // Word.
+ case Hexagon::POST_STwri:
+ return !invertPredicate ? Hexagon::POST_STwri_cPt :
+ Hexagon::POST_STwri_cNotPt;
+ case Hexagon::STriw:
+ return !invertPredicate ? Hexagon::STriw_cPt :
+ Hexagon::STriw_cNotPt;
+ case Hexagon::STriw_indexed:
+ return !invertPredicate ? Hexagon::STriw_indexed_cPt :
+ Hexagon::STriw_indexed_cNotPt;
+ case Hexagon::STriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 :
+ Hexagon::STriw_indexed_shl_cNotPt_V4;
+ case Hexagon::STriw_imm_V4:
+ return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 :
+ Hexagon::STriw_imm_cNotPt_V4;
+ // Double word.
+ case Hexagon::POST_STdri:
+ return !invertPredicate ? Hexagon::POST_STdri_cPt :
+ Hexagon::POST_STdri_cNotPt;
+ case Hexagon::STrid:
+ return !invertPredicate ? Hexagon::STrid_cPt :
+ Hexagon::STrid_cNotPt;
+ case Hexagon::STrid_indexed:
+ return !invertPredicate ? Hexagon::STrid_indexed_cPt :
+ Hexagon::STrid_indexed_cNotPt;
+ case Hexagon::STrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
+ Hexagon::STrid_indexed_shl_cNotPt_V4;
+ // Load.
+ case Hexagon::LDrid:
+ return !invertPredicate ? Hexagon::LDrid_cPt :
+ Hexagon::LDrid_cNotPt;
+ case Hexagon::LDriw:
+ return !invertPredicate ? Hexagon::LDriw_cPt :
+ Hexagon::LDriw_cNotPt;
+ case Hexagon::LDrih:
+ return !invertPredicate ? Hexagon::LDrih_cPt :
+ Hexagon::LDrih_cNotPt;
+ case Hexagon::LDriuh:
+ return !invertPredicate ? Hexagon::LDriuh_cPt :
+ Hexagon::LDriuh_cNotPt;
+ case Hexagon::LDrib:
+ return !invertPredicate ? Hexagon::LDrib_cPt :
+ Hexagon::LDrib_cNotPt;
+ case Hexagon::LDriub:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ case Hexagon::LDriubit:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ // Load Indexed.
+ case Hexagon::LDrid_indexed:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
+ Hexagon::LDrid_indexed_cNotPt;
+ case Hexagon::LDriw_indexed:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt :
+ Hexagon::LDriw_indexed_cNotPt;
+ case Hexagon::LDrih_indexed:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt :
+ Hexagon::LDrih_indexed_cNotPt;
+ case Hexagon::LDriuh_indexed:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt :
+ Hexagon::LDriuh_indexed_cNotPt;
+ case Hexagon::LDrib_indexed:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt :
+ Hexagon::LDrib_indexed_cNotPt;
+ case Hexagon::LDriub_indexed:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt :
+ Hexagon::LDriub_indexed_cNotPt;
+ // Post Increment Load.
+ case Hexagon::POST_LDrid:
+ return !invertPredicate ? Hexagon::POST_LDrid_cPt :
+ Hexagon::POST_LDrid_cNotPt;
+ case Hexagon::POST_LDriw:
+ return !invertPredicate ? Hexagon::POST_LDriw_cPt :
+ Hexagon::POST_LDriw_cNotPt;
+ case Hexagon::POST_LDrih:
+ return !invertPredicate ? Hexagon::POST_LDrih_cPt :
+ Hexagon::POST_LDrih_cNotPt;
+ case Hexagon::POST_LDriuh:
+ return !invertPredicate ? Hexagon::POST_LDriuh_cPt :
+ Hexagon::POST_LDriuh_cNotPt;
+ case Hexagon::POST_LDrib:
+ return !invertPredicate ? Hexagon::POST_LDrib_cPt :
+ Hexagon::POST_LDrib_cNotPt;
+ case Hexagon::POST_LDriub:
+ return !invertPredicate ? Hexagon::POST_LDriub_cPt :
+ Hexagon::POST_LDriub_cNotPt;
+ // DEALLOC_RETURN.
+ case Hexagon::DEALLOC_RET_V4:
+ return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
+ Hexagon::DEALLOC_RET_cNotPt_V4;
+ }
+ llvm_unreachable("Unexpected predicable instruction");
+}
+
+
+bool HexagonInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
+ (Cond[0].getImm() == 0));
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+ //
+ // This assumes that the predicate is always the first operand
+ // in the set of inputs.
+ //
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ int oper;
+ for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
+ break;
+ }
+
+ if (MO.isReg()) {
+ MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+ MO.isImplicit(), MO.isKill(),
+ MO.isDead(), MO.isUndef(),
+ MO.isDebug());
+ } else if (MO.isImm()) {
+ MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+ } else {
+ llvm_unreachable("Unexpected operand type");
+ }
+ }
+
+ int regPos = invertJump ? 1 : 0;
+ MachineOperand PredMO = Cond[regPos];
+ MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
+ PredMO.isImplicit(), PredMO.isKill(),
+ PredMO.isDead(), PredMO.isUndef(),
+ PredMO.isDebug());
+
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+
+bool
+HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == Hexagon::PredRegsRegisterClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+bool
+HexagonInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ // TODO: Fix this
+ return false;
+}
+
+
+//
+// We indicate that we want to reverse the branch by
+// inserting a 0 at the beginning of the Cond vector.
+//
+bool HexagonInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ Cond.erase(Cond.begin());
+ } else {
+ Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
+ const BranchProbability &Probability) const {
+ return (NumInstrs <= 4);
+}
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::DEALLOC_RET_V4 :
+ case Hexagon::DEALLOC_RET_cPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotPt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isValidOffset(const int Opcode, const int Offset) const {
+ // This function is to check whether the "Offset" is in the correct range of
+ // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+ // inserted to calculate the final address. Due to this reason, the function
+ // assumes that the "Offset" has correct alignment.
+
+ switch(Opcode) {
+
+ case Hexagon::LDriw:
+ case Hexagon::STriw:
+ assert((Offset % 4 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+ case Hexagon::LDrid:
+ case Hexagon::STrid:
+ assert((Offset % 8 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::STrih:
+ case Hexagon::LDrih_ae:
+ assert((Offset % 2 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+ case Hexagon::LDrib:
+ case Hexagon::STrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDriubit:
+ case Hexagon::LDrib_ae:
+ case Hexagon::LDriub_ae:
+ return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+ case Hexagon::ADD_ri:
+ case Hexagon::TFR_FI:
+ return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+ (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+ return (0 <= Offset && Offset <= 255);
+
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+ return (0 <= Offset && Offset <= 127);
+
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ return (0 <= Offset && Offset <= 63);
+
+ // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+ // any size. Later pass knows how to handle it.
+ case Hexagon::STriw_pred:
+ case Hexagon::LDriw_pred:
+ return true;
+
+ // INLINEASM is very special.
+ case Hexagon::INLINEASM:
+ return true;
+ }
+
+ llvm_unreachable("No offset range is defined for this opcode. "
+ "Please define it in the above switch statement!");
+}
+
+
+//
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::
+isValidAutoIncImm(const EVT VT, const int Offset) const {
+
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
+ }
+ llvm_unreachable("Not an auto-inc opc!");
+}
+
+
+bool HexagonInstrInfo::
+isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::
+isConditionalTransfer (const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::ADD_ri_cPt:
+ case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::ADD_rr_cPt:
+ case Hexagon::ADD_rr_cNotPt:
+ case Hexagon::XOR_rr_cPt:
+ case Hexagon::XOR_rr_cNotPt:
+ case Hexagon::AND_rr_cPt:
+ case Hexagon::AND_rr_cNotPt:
+ case Hexagon::OR_rr_cPt:
+ case Hexagon::OR_rr_cNotPt:
+ case Hexagon::SUB_rr_cPt:
+ case Hexagon::SUB_rr_cNotPt:
+ case Hexagon::COMBINE_rr_cPt:
+ case Hexagon::COMBINE_rr_cNotPt:
+ return true;
+ case Hexagon::ASLH_cPt_V4:
+ case Hexagon::ASLH_cNotPt_V4:
+ case Hexagon::ASRH_cPt_V4:
+ case Hexagon::ASRH_cNotPt_V4:
+ case Hexagon::SXTB_cPt_V4:
+ case Hexagon::SXTB_cNotPt_V4:
+ case Hexagon::SXTH_cPt_V4:
+ case Hexagon::SXTH_cNotPt_V4:
+ case Hexagon::ZXTB_cPt_V4:
+ case Hexagon::ZXTB_cNotPt_V4:
+ case Hexagon::ZXTH_cPt_V4:
+ case Hexagon::ZXTH_cNotPt_V4:
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+ default:
+ return false;
+ }
+}
+
+bool HexagonInstrInfo::
+isConditionalLoad (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ return true;
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ default:
+ return false;
+ }
+}
+
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+//
+// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+// ^ ^
+// / \ (not OK. it will cause new-value store to be
+// / X conditional on p0.new while R2 producer is
+// / \ on p0)
+// / \.
+// p.new store p.old NV store
+// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new]
+// ^ ^
+// \ /
+// \ /
+// \ /
+// p.old store
+// [if (p0)memw(R0+#0)=R2]
+//
+// The above diagram shows the steps involoved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+// 2) p0 = cmp.eq(r3, #0) }
+//
+// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+bool HexagonInstrInfo::
+isConditionalStore (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::STrib_imm_cPt_V4 :
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrib_cPt :
+ case Hexagon::STrib_cNotPt :
+ case Hexagon::POST_STbri_cPt :
+ case Hexagon::POST_STbri_cNotPt :
+ case Hexagon::STrid_indexed_cPt :
+ case Hexagon::STrid_indexed_cNotPt :
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ case Hexagon::POST_STdri_cPt :
+ case Hexagon::POST_STdri_cNotPt :
+ case Hexagon::STrih_cPt :
+ case Hexagon::STrih_cNotPt :
+ case Hexagon::STrih_indexed_cPt :
+ case Hexagon::STrih_indexed_cNotPt :
+ case Hexagon::STrih_imm_cPt_V4 :
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::POST_SThri_cPt :
+ case Hexagon::POST_SThri_cNotPt :
+ case Hexagon::STriw_cPt :
+ case Hexagon::STriw_cNotPt :
+ case Hexagon::STriw_indexed_cPt :
+ case Hexagon::STriw_indexed_cNotPt :
+ case Hexagon::STriw_imm_cPt_V4 :
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::POST_STwri_cPt :
+ case Hexagon::POST_STwri_cNotPt :
+ return QRI.Subtarget.hasV4TOps();
+
+ // V4 global address store before promoting to dot new.
+ case Hexagon::STrid_GP_cPt_V4 :
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ case Hexagon::STrib_GP_cPt_V4 :
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ case Hexagon::STrih_GP_cPt_V4 :
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ case Hexagon::STriw_GP_cPt_V4 :
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ return QRI.Subtarget.hasV4TOps();
+
+ // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+ // from the "Conditional Store" list. Because a predicated new value store
+ // would NOT be promoted to a double dot new store. See diagram below:
+ // This function returns yes for those stores that are predicated but not
+ // yet promoted to predicate dot new instructions.
+ //
+ // +---------------------+
+ // /-----| if (p0) memw(..)=r0 |---------\~
+ // || +---------------------+ ||
+ // promote || /\ /\ || promote
+ // || /||\ /||\ ||
+ // \||/ demote || \||/
+ // \/ || || \/
+ // +-------------------------+ || +-------------------------+
+ // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
+ // +-------------------------+ || +-------------------------+
+ // || || ||
+ // || demote \||/
+ // promote || \/ NOT possible
+ // || || /\~
+ // \||/ || /||\~
+ // \/ || ||
+ // +-----------------------------+
+ // | if (p0.new) memw(..)=r0.new |
+ // +-----------------------------+
+ // Double Dot New Store
+ //
+
+ default:
+ return false;
+
+ }
+ return false;
+}
+
+
+
+DFAPacketizer *HexagonInstrInfo::
+CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II);
+}
+
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel() || MI->isInlineAsm())
+ return true;
+
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 000000000000..6a45871b67e2
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,185 @@
+//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonINSTRUCTIONINFO_H
+#define HexagonINSTRUCTIONINFO_H
+
+#include "HexagonRegisterInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+ const HexagonRegisterInfo RI;
+ const HexagonSubtarget& Subtarget;
+public:
+ explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+ virtual bool
+ PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ virtual bool
+ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
+ const BranchProbability &Probability) const;
+
+ virtual DFAPacketizer*
+ CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+ bool isValidOffset(const int Opcode, const int Offset) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isU6_3Immediate(const int value) const;
+ bool isU6_2Immediate(const int value) const;
+ bool isU6_1Immediate(const int value) const;
+ bool isU6_0Immediate(const int value) const;
+ bool isS4_3Immediate(const int value) const;
+ bool isS4_2Immediate(const int value) const;
+ bool isS4_1Immediate(const int value) const;
+ bool isS4_0Immediate(const int value) const;
+ bool isS12_Immediate(const int value) const;
+ bool isU6_Immediate(const int value) const;
+ bool isS8_Immediate(const int value) const;
+ bool isS6_Immediate(const int value) const;
+
+ bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
+ bool isConditionalTransfer(const MachineInstr* MI) const;
+ bool isConditionalALU32 (const MachineInstr* MI) const;
+ bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isConditionalStore(const MachineInstr* MI) const;
+ bool isDeallocRet(const MachineInstr *MI) const;
+ unsigned getInvertedPredicatedOpcode(const int Opc) const;
+ bool isExtendable(const MachineInstr* MI) const;
+ bool isExtended(const MachineInstr* MI) const;
+ bool isPostIncrement(const MachineInstr* MI) const;
+ bool isNewValueStore(const MachineInstr* MI) const;
+ bool isNewValueJump(const MachineInstr* MI) const;
+ unsigned getImmExtForm(const MachineInstr* MI) const;
+ unsigned getNormalBranchForm(const MachineInstr* MI) const;
+
+private:
+ int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 000000000000..fd5adef0f63f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,3052 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonImmediates.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
+def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMEMrrOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+// Address operands
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+ let PrintMethod = "printMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+ let PrintMethod = "printFrameIndexOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in
+ def globaladdress : Operand<i32>;
+
+let PrintMethod = "printJumpTable" in
+ def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+// Multi-class for logical operators.
+multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
+ def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
+ [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>;
+}
+
+// Multi-class for compare ops.
+let isCompare = 1 in {
+multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>;
+}
+multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+}
+
+multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>;
+}
+
+multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+let isPredicable = 1 in
+def ADD_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def ADD_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>;
+
+// Logical operations.
+let isPredicable = 1 in
+def XOR_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def AND_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>;
+
+def OR_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = or($src1, #$src2)",
+ [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>;
+
+def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = not($src1)",
+ [(set IntRegs:$dst, (not IntRegs:$src1))]>;
+
+def AND_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s10Imm:$src2),
+ "$dst = and($src1, #$src2)",
+ [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>;
+
+let isPredicable = 1 in
+def OR_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>;
+
+// Negate.
+def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = neg($src1)",
+ [(set IntRegs:$dst, (ineg IntRegs:$src1))]>;
+// Nop.
+let neverHasSideEffects = 1 in
+def NOP : ALU32_rr<(outs), (ins),
+ "nop",
+ []>;
+
+// Subtract.
+let isPredicable = 1 in
+def SUB_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>;
+
+// Transfer immediate.
+let isReMaterializable = 1, isPredicable = 1 in
+def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ "$dst = #$src1",
+ [(set IntRegs:$dst, s16ImmPred:$src1)]>;
+
+// Transfer register.
+let neverHasSideEffects = 1, isPredicable = 1 in
+def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+
+// Transfer control register.
+let neverHasSideEffects = 1 in
+def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ []>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "$dst = vmux($src1, $src2, $src3)",
+ []>;
+
+def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst = mux($src1, $src2, $src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1,
+ s8ImmPred:$src2, IntRegs:$src3))]>;
+
+def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ s8ImmPred:$src3))]>;
+
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, #$src2, #$src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2,
+ s8ImmPred:$src3))]>;
+
+// Shift halfword.
+let isPredicable = 1 in
+def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = aslh($src1)",
+ [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>;
+
+let isPredicable = 1 in
+def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = asrh($src1)",
+ [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>;
+
+// Sign extend.
+let isPredicable = 1 in
+def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtb($src1)",
+ [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>;
+
+let isPredicable = 1 in
+def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxth($src1)",
+ [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>;
+
+// Zero extend.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = zxtb($src1)",
+ []>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = zxth($src1)",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+
+// Conditional add.
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if ($src1) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if (!$src1) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if ($src1.new) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if (!$src1.new) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = add($src2, $src3)",
+ []>;
+
+
+// Conditional combine.
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+// Conditional logical operations.
+
+let isPredicated = 1 in
+def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = xor($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = xor($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = xor($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = xor($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = and($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = and($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = and($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = and($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = or($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = or($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = or($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = or($src2, $src3)",
+ []>;
+
+
+// Conditional subtract.
+
+let isPredicated = 1 in
+def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = sub($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = sub($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = sub($src2, $src3)",
+ []>;
+
+let isPredicated = 1 in
+def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = sub($src2, $src3)",
+ []>;
+
+
+// Conditional transfer.
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if (!$src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
+ "if ($src1) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if (!$src1) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if ($src1.new) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if (!$src1.new) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if ($src1.new) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if (!$src1.new) $dst = #$src2",
+ []>;
+
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/VH +
+//===----------------------------------------------------------------------===//
+// Vector add halfwords
+
+// Vector averagehalfwords
+
+// Vector subtract halfwords
+//===----------------------------------------------------------------------===//
+// ALU32/VH -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Add halfword.
+
+// Compare.
+defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
+defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
+defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
+
+// Logical operations.
+def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set DoubleRegs:$dst, (xor DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Maximum.
+def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1, IntRegs:$src2))]>;
+
+// Minimum.
+def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1, IntRegs:$src2))]>;
+
+// Subtract.
+def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set DoubleRegs:$dst, (sub DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Subtract halfword.
+
+// Transfer register.
+let neverHasSideEffects = 1 in
+def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VB +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VH +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VW +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>;
+
+let neverHasSideEffects = 1 in
+def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = and($src1, !$src2)",
+ []>;
+
+def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = any8($src1)",
+ []>;
+
+def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = all8($src1)",
+ []>;
+
+def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = vitpack($src1, $src2)",
+ []>;
+
+def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = valignb($src1, $src2, $src3)",
+ []>;
+
+def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = vspliceb($src1, $src2, $src3)",
+ []>;
+
+def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
+ "$dst = mask($src1)",
+ []>;
+
+def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = not($src1)",
+ [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>;
+
+def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>;
+
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Jump to address.
+let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
+ def JMP : JInst< (outs),
+ (ins brtarget:$offset),
+ "jump $offset",
+ [(br bb:$offset)]>;
+}
+
+// if (p0) jump
+let isBranch = 1, isTerminator=1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_c : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src) jump $offset",
+ [(brcond PredRegs:$src, bb:$offset)]>;
+}
+
+// if (!p0) jump
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cNot : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src) jump $offset",
+ []>;
+}
+
+let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
+ "if ($pred) jump $dst",
+ []>;
+}
+
+// Jump to address conditioned on new predicate.
+// if (p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:t $offset",
+ []>;
+}
+
+// if (!p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnNotPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:t $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:nt $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+ isPredicated = 1 in {
+ def JMP_cdnNotPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:nt $offset",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR: JRInst<(outs), (ins),
+ "jumpr r31",
+ [(retflag)]>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if ($src1) jumpr r31",
+ []>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if (!$src1) jumpr r31",
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+// Load doubleword.
+let isPredicable = 1 in
+def LDrid : LDInst<(outs DoubleRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memd($addr)",
+ [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s11_3Imm:$offset),
+ "$dst=memd($src1+#$offset)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ s11_3ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memd(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memd(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memd($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load doubleword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memd($addr)",
+ []>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if ($src1) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if (!$src1) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if ($src1) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if (!$src1) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if ($src1.new) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if (!$src1.new) $dst=memd($src2+#$src3)",
+ []>;
+
+
+// Load byte.
+let isPredicable = 1 in
+def LDrib : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memb($addr)",
+ [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>;
+
+def LDrib_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memb($addr)",
+ [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+// Indexed load byte.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrib_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memb($src1+#$offset)",
+ [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memb($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memb(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memb(#$global)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memub(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memb($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1.new) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1.new) $dst = memb($src2+#$src3)",
+ []>;
+
+
+// Load halfword.
+let isPredicable = 1 in
+def LDrih : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memh($addr)",
+ [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrih_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memh($src1+#$offset)",
+ [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+def LDrih_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memh($addr)",
+ [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+let AddedComplexity = 20 in
+def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memh($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memh(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memh(#$global)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memuh(#$global)",
+ []>;
+
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memh($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1.new) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1.new) $dst = memh($src2+#$src3)",
+ []>;
+
+// Load unsigned byte.
+let isPredicable = 1 in
+def LDriub : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1 in
+def LDriubit : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriub_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let AddedComplexity = 20 in
+def LDriubit_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+def LDriub_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+
+let AddedComplexity = 20 in
+def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memub(#$global+$offset)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memub($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load unsigned byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1.new) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1.new) $dst = memub($src2+#$src3)",
+ []>;
+
+// Load unsigned halfword.
+let isPredicable = 1 in
+def LDriuh : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memuh($addr)",
+ [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>;
+
+// Indexed load unsigned halfword.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memuh($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))]>;
+
+def LDriuh_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memuh($addr)",
+ [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+
+// Indexed load unsigned halfword any-extend.
+let AddedComplexity = 20 in
+def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memuh($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memuh(#$global+$offset)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memuh($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load unsigned halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1.new) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1.new) $dst = memuh($src2+#$src3)",
+ []>;
+
+
+// Load word.
+let isPredicable = 1 in
+def LDriw : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr), "$dst = memw($addr)",
+ [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>;
+
+// Load predicate.
+let mayLoad = 1, Defs = [R10,R11] in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins MEMri:$addr),
+ "Error; should not emit",
+ []>;
+
+// Indexed load.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriw_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_2Imm:$offset),
+ "$dst=memw($src1+#$offset)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ s11_2ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memw(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memw(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memw($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load word conditionally.
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if ($src1) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if (!$src1) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if ($src1) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if (!$src1) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if ($src1.new) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if (!$src1.new) $dst=memw($src2+#$src3)",
+ []>;
+
+// Deallocate stack frame.
+let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
+ def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1),
+ "deallocframe",
+ []>;
+}
+
+// Load and unpack bytes to halfwords.
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+// Multiply and use lower result.
+// Rd=+mpyi(Rs,#u8)
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst =+ mpyi($src1, #$src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>;
+
+// Rd=-mpyi(Rs,#u8)
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+ "$dst =- mpyi($src1, #$src2)",
+ [(set IntRegs:$dst,
+ (mul IntRegs:$src1, n8ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+ "$dst = mpyi($src1, #$src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,Rt)
+def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyi($src1, $src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rx+=mpyi(Rs,#u8)
+def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ "$dst += mpyi($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rx+=mpyi(Rs,Rt)
+def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyi($src2, $src3)",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rx-=mpyi(Rs,#u8)
+def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ "$dst -= mpyi($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+// Multiply and use upper result.
+// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
+// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
+// Rd=mpy(Rs,Rt)
+def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rd=mpy(Rs,Rt):rnd
+// Rd=mpyu(Rs,Rt)
+def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>;
+
+// Multiply and use full result.
+// Rdd=mpyu(Rs,Rt)
+def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)),
+ (i64 (anyext IntRegs:$src2))))]>;
+
+// Rdd=mpy(Rs,Rt)
+def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)),
+ (i64 (sext IntRegs:$src2))))]>;
+
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+// Rxx+=mpy(Rs,Rt)
+def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpy($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))),
+ DoubleRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rxx-=mpy(Rs,Rt)
+def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst -= mpy($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (sub DoubleRegs:$src1,
+ (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))],
+ "$src1 = $dst">;
+
+// Rxx[+-]=mpyu(Rs,Rt)
+// Rxx+=mpyu(Rs,Rt)
+def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)),
+ (i64 (anyext IntRegs:$src3))),
+ DoubleRegs:$src1))],"$src1 = $dst">;
+
+// Rxx-=mpyu(Rs,Rt)
+def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (sub DoubleRegs:$src1,
+ (mul (i64 (anyext IntRegs:$src2)),
+ (i64 (anyext IntRegs:$src3)))))],
+ "$src1 = $dst">;
+
+
+def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += add($src2, $src3)",
+ [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3),
+ IntRegs:$src1))],
+ "$src1 = $dst">;
+
+def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Imm:$src3),
+ "$dst += add($src2, #$src3)",
+ [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3),
+ IntRegs:$src1))],
+ "$src1 = $dst">;
+
+def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst -= add($src2, $src3)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2,
+ IntRegs:$src3)))],
+ "$src1 = $dst">;
+
+def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Imm:$src3),
+ "$dst -= add($src2, #$src3)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1,
+ (add IntRegs:$src2, s8ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+// Store doubleword.
+let isPredicable = 1 in
+def STrid : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memd($addr) = $src1",
+ [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>;
+
+// Indexed store double word.
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
+ "memd($src1+#$src2) = $src3",
+ [(store DoubleRegs:$src3,
+ (add IntRegs:$src1, s11_3ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+ "memd(#$global+$offset) = $src",
+ []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STdri : STInstPI<(outs IntRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memd($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store doubleword conditionally.
+// if ([!]Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if ($src1) memd($addr) = $src2",
+ []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if (!$src1) memd($addr) = $src2",
+ []>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if ($src1) memd($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if (!$src1) memd($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if ($src1) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if (!$src1) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">;
+
+
+// Store byte.
+// memb(Rs+#s11:0)=Rt
+let isPredicable = 1 in
+def STrib : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memb($addr) = $src1",
+ [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+ "memb($src1+#$src2) = $src3",
+ [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1,
+ s11_0ImmPred:$src2))]>;
+
+// memb(gp+#u16:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src",
+ []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src",
+ []>;
+
+// memb(Rx++#s4:0)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2,
+ s4Imm:$offset),
+ "memb($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_truncsti8 IntRegs:$src1, IntRegs:$src2,
+ s4_0ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store byte conditionally.
+// if ([!]Pv) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memb($addr) = $src2",
+ []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memb($addr) = $src2",
+ []>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1) memb($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memb($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+// Store halfword.
+// memh(Rs+#s11:1)=Rt
+let isPredicable = 1 in
+def STrih : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memh($addr) = $src1",
+ [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>;
+
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+ "memh($src1+#$src2) = $src3",
+ [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1,
+ s11_1ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src",
+ []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src",
+ []>;
+
+// memh(Rx++#s4:1)=Rt.H
+// memh(Rx++#s4:1)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri : STInstPI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memh($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_truncsti16 IntRegs:$src1, IntRegs:$src2,
+ s4_1ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store halfword conditionally.
+// if ([!]Pv) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memh($addr) = $src2",
+ []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memh($addr) = $src2",
+ []>;
+
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1) memh($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memh($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+// Store word.
+// Store predicate.
+let Defs = [R10,R11] in
+def STriw_pred : STInst<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>;
+
+// memw(Rs+#s11:2)=Rt
+let isPredicable = 1 in
+def STriw : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1",
+ [(store IntRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3",
+ [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src",
+ []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STwri : STInstPI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memw($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store word conditionally.
+// if ([!]Pv) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memw($addr) = $src2",
+ []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memw($addr) = $src2",
+ []>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1) memw($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memw($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+
+// Allocate stack frame.
+let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
+ def ALLOCFRAME : STInst<(outs),
+ (ins i32imm:$amt),
+ "allocframe(#$amt)",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical NOT.
+def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = not($src1)",
+ [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>;
+
+
+// Sign extend word to doubleword.
+def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtw($src1)",
+ [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PERM +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+// Predicate transfer.
+let neverHasSideEffects = 1 in
+def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
+ "$dst = $src1 // Should almost never emit this",
+ []>;
+
+def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1 // Should almost never emit!",
+ [(set PredRegs:$dst, (trunc IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate.
+def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asl($src1, #$src2)",
+ [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ u6Imm:$src3),
+ "$dst += lsr($src2, #$src3)",
+ [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+ (srl DoubleRegs:$src2,
+ u6ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+// Shift by immediate and accumulate.
+def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "$dst += asr($src2, $src3)",
+ [], "$src1 = $dst">;
+
+// Shift by immediate and add.
+def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ u3Imm:$src3),
+ "$dst = addasl($src1, $src2, #$src3)",
+ [(set IntRegs:$dst, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u3ImmPred:$src3)))]>;
+
+// Shift by register.
+def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asl($src1, $src2)",
+ [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>;
+
+def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>;
+
+
+def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>;
+
+def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ "$dst = lsl($src1, $src2)",
+ [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
+ [SDNPHasChain]>;
+
+let hasSideEffects = 1 in
+def BARRIER : STInst<(outs), (ins),
+ "barrier",
+ [(HexagonBARRIER)]>;
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// TFRI64 - assembly mapped.
+let isReMaterializable = 1 in
+def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+ "$dst = #$src1",
+ [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100 in
+def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst,
+ (select PredRegs:$src1, IntRegs:$src2, s12ImmPred:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst,
+ (select PredRegs:$src1, s12ImmPred:$src2, IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (select PredRegs:$src1,
+ s12ImmPred:$src2,
+ s12ImmPred:$src3))]>;
+
+// Generate frameindex addresses.
+let isReMaterializable = 1 in
+def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
+ "$dst = add($src1)",
+ [(set IntRegs:$dst, ADDRri:$src1)]>;
+
+//
+// CR - Type.
+//
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
+ "loop0($offset, #$src2)",
+ []>;
+}
+
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
+ "loop0($offset, $src2)",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
+}
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+let isMoveImm = 1 in
+def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32 tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst = CONST32(#$jt)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32 tjumptable:$jt))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32_GP tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst, imm:$global) ]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst = CONST32($label)",
+ [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+ "$dst = CONST64(#$global)",
+ [(set DoubleRegs:$dst, imm:$global) ]>;
+
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
+ "$dst = xor($dst, $dst)",
+ [(set PredRegs:$dst, 0)]>;
+
+def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set IntRegs:$dst,
+ (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)),
+ (i64 (sext IntRegs:$src2)))),
+ (i32 32)))))]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "Should never be emitted",
+ [(callseq_start timm:$amt)]>;
+}
+
+let Defs = [R29, R30, R31], Uses = [R29] in {
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "Should never be emitted",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []>;
+}
+
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "callr $dst",
+ []>;
+ }
+
+// Tail Calls.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // TAILCALL", []>;
+}
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // TAILCALL", []>;
+}
+
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "jumpr $dst // TAILCALL", []>;
+}
+// Map call instruction.
+def : Pat<(call IntRegs:$dst),
+ (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
+//Tail calls.
+def : Pat<(HexagonTCRet tglobaladdr:$dst),
+ (TCRETURNtg tglobaladdr:$dst)>;
+def : Pat<(HexagonTCRet texternalsym:$dst),
+ (TCRETURNtext texternalsym:$dst)>;
+def : Pat<(HexagonTCRet IntRegs:$dst),
+ (TCRETURNR IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1).
+def : Pat <(and IntRegs:$src1, 65535),
+ (ZXTH IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def : Pat <(and IntRegs:$src1, 255),
+ (ZXTB IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+// Add(p1, false) should never be produced,
+// if it does, it got to be mapped to NOOP.
+def : Pat <(add PredRegs:$src1, -1),
+ (NOT_p PredRegs:$src1)>;
+
+// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
+// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3,
+ IntRegs:$src4),
+ (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4,
+ IntRegs:$src3)>, Requires<[HasV2TOnly]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3),
+ (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
+ (JMP_cNot PredRegs:$src1, bb:$offset)>;
+
+// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
+def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+ (AND_pnotp PredRegs:$src1, PredRegs:$src2)>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress) -> memd(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDw_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (LDd_GP tglobaladdr:$global)>;
+
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd.
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDrih_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriuh_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriuh_GP tglobaladdr:$global, 0)>;
+// Map from load(globaladdress + x) -> memub(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriub_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memub(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDub_GP tglobaladdr:$global)>;
+
+// When the Interprocedural Global Variable optimizer realizes that a
+// certain global variable takes only two constant values, it shrinks the
+// global to a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDub_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memuh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDuh_GP tglobaladdr:$global)>;
+
+// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
+ (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)),
+ (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)),
+ (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)),
+ (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// We want to prevent emiting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a JMP_cNot.
+def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_cNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset),
+ (JMP_cNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset),
+ (JMP_cNot PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset),
+ (JMP_c PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset),
+ (JMP_cNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_c (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_cNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2),
+ bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (COMBINE_rr
+ (MUX_rr PredRegs:$src1,
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)),
+ (MUX_rr PredRegs:$src1,
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2),
+ (AND_pp (NOT_p PredRegs:$src1), PredRegs:$src3))>;
+
+// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
+def : Pat<(i1 (load ADDRriS11_2:$addr)),
+ (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def : Pat<(i32 (trunc DoubleRegs:$src)),
+ (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def : Pat<(i1 (trunc DoubleRegs:$src)),
+ (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>;
+
+// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
+def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
+def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
+def : Pat<(truncstorei32 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+let AddedComplexity = 100 in
+// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
+// memw(#foo) = r0
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP tglobaladdr:$global, (TFRI 1))>;
+
+
+// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
+def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>;
+
+// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
+// Better way to do this?
+def : Pat<(i64 (anyext IntRegs:$src1)),
+ (i64 (SXTW IntRegs:$src1))>;
+
+// Map cmple -> cmpgt.
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)),
+ (i1 (NOT_p (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_p (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_p (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)),
+ (i1 (NOT_p(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>;
+
+// Map cmpne(Rs) -> !cmpeqe(Rs).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_p(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)),
+ (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_p(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_p(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>;
+
+def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)),
+ (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_p(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// rs < rt -> !(rs >= rt).
+def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)),
+ (i1 (NOT_p (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>;
+
+// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt).
+// rs < rt -> rs < rt. Let assembler map it.
+def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)),
+ (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
+// rss < rtt -> (rtt > rss).
+def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)),
+ (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_p (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>;
+
+// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_p (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Sign extends.
+// i1 -> i32
+def : Pat <(i32 (sext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, -1, 0))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert any-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i16 -> i64
+def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
+ (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i32 -> i64
+def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
+ (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+
+
+// Zero extends.
+// i1 -> i32
+def : Pat <(i32 (zext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (zext PredRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>;
+
+// i32 -> i64
+def : Pat <(i64 (zext IntRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// i8 -> i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+
+// i16 -> i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+
+// i32 -> i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
+ (i32 (LDriw ADDRriS11_0:$src1))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (zext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (anyext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def : Pat <(i64 (anyext PredRegs:$src1)),
+ (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>;
+
+
+// Any extended 64-bit load.
+// anyext i32 -> i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+// anyext i16 -> i64.
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+
+// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
+def : Pat<(i64 (zext IntRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2),
+ (MPYU64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG
+ (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG (LSRd_ri(MPYU64
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ 32) ,subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ 32),subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+ )>;
+
+// Multiply 64-bit signed and use upper result.
+def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (MPY64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG
+ (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG (LSRd_ri(MPYU64
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ 32) ,subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ 32),subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+ )>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
+ SDTHexagonADJDYNALLOC>;
+// Needed to tag these instructions for stack layout.
+let usesCustomInserter = 1 in
+def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1,
+ s16ImmPred:$src2))]>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>;
+
+let AddedComplexity = 100 in
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)),
+ (TFR IntRegs:$src1)>;
+
+
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(HexagonBR_JT IntRegs:$src)]>;
+def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+
+def : Pat<(HexagonWrapperJT tjumptable:$dst),
+ (CONST32_set_jt tjumptable:$dst)>;
+
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 000000000000..2bd6770efd7d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,137 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "callr $dst",
+ []>, Requires<[HasV3TOnly]>;
+ }
+
+
+// Jump to address from register
+// if(p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// Not taken.
+// if(p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 200 in
+def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>,
+Requires<[HasV3T]>;
+
+let AddedComplexity = 200 in
+def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>,
+Requires<[HasV3T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+
+// Map call instruction
+def : Pat<(call (i32 IntRegs:$dst)),
+ (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call texternalsym:$dst),
+ (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 000000000000..f507e4f37c18
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,5746 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def IMMEXT : Immext<(outs), (ins),
+ "##immext //should never emit this",
+ []>,
+ Requires<[HasV4T]>;
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+// Shift halfword.
+
+let isPredicated = 1 in
+def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Sign extend.
+
+let isPredicated = 1 in
+def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicated = 1 in
+def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Zero exten.
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Generate frame index addresses.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$offset),
+ "$dst = add($src1, ##$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine
+// Rdd=combine(Rs, #s8)
+let neverHasSideEffects = 1 in
+def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = combine($src1, #$src2)",
+ []>,
+ Requires<[HasV4T]>;
+// Rdd=combine(#s8, Rs)
+let neverHasSideEffects = 1 in
+def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+ (ins s8Imm:$src1, IntRegs:$src2),
+ "$dst = combine(#$src1, $src2)",
+ []>,
+ Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+//
+// These absolute set addressing mode instructions accept immediate as
+// an operand. We have duplicated these patterns to take global address.
+
+let neverHasSideEffects = 1 in
+def LDrid_abs_setimm_V4 : LDInst<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memd($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrib_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memb($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrih_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memh($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriub_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memub($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriuh_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memuh($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriw_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memw($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Following patterns are defined for absolute set addressing mode
+// instruction which take global address as operand.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_abs_set_V4 : LDInst<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memd($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memb($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memub($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memuh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memw($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Load doubleword.
+//
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//
+// Rdd=memd(Rs+Rt<<#u2)
+// Special case pattern for indexed load without offset which is easier to
+// match. AddedComplexity of this pattern should be lower than base+offset load
+// and lower yet than the more generic version with offset/shift below
+// Similar approach is taken for all other base+index loads.
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memd($src1+$src2<<#0)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (load (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memd($src1+$src2<<#$offset)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (load (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+//// Load doubleword conditionally.
+// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rdd=memd(Rt<<#u2+#U6)
+
+//// Load byte.
+// Rd=memb(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memb($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memub($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memub($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi8 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memb($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memub($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memub($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi8 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+//// Load byte conditionally.
+// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+//// Load unsigned byte conditionally.
+// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Rt<<#u2+#U6)
+
+//// Load halfword
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memh($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memuh($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memuh($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi16 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memh($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memuh($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memuh($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi16 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+//// Load halfword conditionally.
+// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+//// Load unsigned halfword conditionally.
+// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Rt<<#u2+#U6)
+
+//// Load word.
+// Load predicate: Fix for bug 5279.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_pred_V4 : LDInst<(outs PredRegs:$dst),
+ (ins MEMri:$addr),
+ "Error; should not emit",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memw($src1+$src2<<#0)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (load (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memw($src1+$src2<<#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (load (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
+ Requires<[HasV4T]>;
+
+//// Load word conditionally.
+// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
+// if (Pv) Rd=memw(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Rt<<#u2+#U6)
+
+
+// Post-inc Load, Predicated, Dot new
+
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if ($src1.new) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if (!$src1.new) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1.new) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1.new) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1.new) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1.new) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1.new) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1.new) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1.new) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if ($src1.new) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if (!$src1.new) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+/// Load from global offset
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memd(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memb(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memub(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memh(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memuh(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memw(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memd(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memb(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memub(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memh(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memuh(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memw(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
+ Requires<[HasV4T]>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+
+// memd(Re=#U6)=Rtt
+def STrid_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+ (ins DoubleRegs:$src1, u6Imm:$src2),
+ "memd($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Re=#U6)=Rs
+def STrib_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u6Imm:$src2),
+ "memb($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Re=#U6)=Rs
+def STrih_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u6Imm:$src2),
+ "memh($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Re=#U6)=Rs
+def STriw_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u6Imm:$src2),
+ "memw($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memd(Re=#U6)=Rtt
+def STrid_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+ (ins DoubleRegs:$src1, globaladdress:$src2),
+ "memd($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Re=#U6)=Rs
+def STrib_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdress:$src2),
+ "memb($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Re=#U6)=Rs
+def STrih_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdress:$src2),
+ "memh($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Re=#U6)=Rs
+def STriw_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdress:$src2),
+ "memw($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
+ "memd($src1+$src2<<#$src3) = $src4",
+ [(store (i64 DoubleRegs:$src4),
+ (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memd(Ru<<#u2+#U6)=Rtt
+let AddedComplexity = 10 in
+def STrid_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store (i64 DoubleRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if ($src1.new) memd($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if (!$src1.new) memd($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if ($src1.new) memd($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if (!$src1.new) memd($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
+// if (Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if ($src1.new) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if ($src1.new) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+// if (!Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if (!$src1.new) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store byte.
+// memb(Rs+#u6:0)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
+ "memb($src1+#$src2) = #$src3",
+ [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+ u6_0ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memb($src1+$src2<<#$src3) = $src4",
+ [(truncstorei8 (i32 IntRegs:$src4),
+ (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrib_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei8 (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+
+// Store byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Rt
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
+// if (Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if ($src1) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memb($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memb($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memb($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
+// if (Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+// if (Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1.new) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+// if (!Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1.new) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store halfword.
+// TODO: needs to be implemented
+// memh(Re=#U6)=Rt.H
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+#u6:1)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
+ "memh($src1+#$src2) = #$src3",
+ [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+ u6_1ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memh($src1+$src2<<#$src3) = $src4",
+ [(truncstorei16 (i32 IntRegs:$src4),
+ (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrih_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei16 (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
+// if (Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if ($src1) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memh($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memh($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memh($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
+// if (Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+// if (Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1.new) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+// if (!Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1.new) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+
+// Store predicate:
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_pred_V4 : STInst<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
+ "memw($src1+#$src2) = #$src3",
+ [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+ u6_2ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memw($src1+$src2<<#$src3) = $src4",
+ [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STriw_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4",
+ [(store (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+// memw(gp+#u16:2)=Rt
+
+
+// Store word conditionally.
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
+// if (Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if ($src1) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memw($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memw($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memw($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
+// if (Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+// if (Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1.new) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+// if (!Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1.new) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+/// store to global address
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+ "memd(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if ($src1) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if (!$src1) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if ($src1.new) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if (!$src1.new) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memd(#global)=Rtt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STd_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, DoubleRegs:$src),
+ "memd(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if ($src1) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if (!$src1) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if ($src1.new) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if (!$src1.new) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(#global)=Rt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(#global)=Rt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(#global)=Rt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_V4 : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memw(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+ (i64 DoubleRegs:$src1)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+// to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i64 DoubleRegs:$src1)),
+ (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1), (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+// Store new-value byte.
+
+// memb(Re=#U6)=Nt.new
+// memb(Rs+#s11:0)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+ "memb($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrib_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+ "memb($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memb($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
+ "memb($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+
+// memb(gp+#u16:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(#global)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// Store new-value byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Nt.new
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
+// if (Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1.new) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1.new) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store new-value halfword.
+// memh(Re=#U6)=Nt.new
+// memh(Rs+#s11:1)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+ "memh($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrih_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+ "memh($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memh($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
+ "memh($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memh(gp+#u16:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(#global)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value halfword conditionally.
+
+// if ([!]Pv[.new]) memh(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
+// if (Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1.new) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1.new) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store new-value word.
+
+// memw(Re=#U6)=Nt.new
+// memw(Rs+#s11:2)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STriw_nv_V4 : NVInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STriw_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memw($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
+ "memw($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+// memw(gp+#u16:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memw(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// Store new-value word conditionally.
+
+// if ([!]Pv[.new]) memw(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
+// if (Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1.new) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1.new) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NV/J +
+//===----------------------------------------------------------------------===//
+
+multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, $src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, $src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1, $src2.new)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1, $src2.new)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr, string TakenStr> {
+ def _ie_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
+ !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+ !strconcat("($src1.new, #$src2)) jump:",
+ !strconcat(TakenStr, " $offset"))))),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Multiclass for regular dot new of Ist operand register.
+multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_reg<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for dot new of 2nd operand register.
+multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for 2nd operand immediate, including -1.
+multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
+ defm Ptneg : NVJ_type_basic_neg<NotStr, OpcStr, "t">;
+ defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for 2nd operand immediate, excluding -1.
+multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for tstbit, where 2nd operand is always #0.
+multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> {
+ defm Pt : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">;
+ defm Pnt : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for GT.
+multiclass NVJ_type_rr_ri<string OpcStr> {
+ defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
+ defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
+ defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
+ defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>;
+ defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>;
+ defm ri : NVJ_type_br_pred_imm<"", OpcStr>;
+}
+
+// Multiclass for EQ.
+multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> {
+ defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
+ defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
+ defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>;
+ defm ri : NVJ_type_br_pred_imm<"", OpcStr>;
+}
+
+// Multiclass for GTU.
+multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> {
+ defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
+ defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
+ defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
+ defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>;
+ defm riNot : NVJ_type_br_pred_imm_only<"!", OpcStr>;
+ defm ri : NVJ_type_br_pred_imm_only<"", OpcStr>;
+}
+
+// Multiclass for tstbit.
+multiclass NVJ_type_r0<string OpcStr> {
+ defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>;
+ defm r0 : NVJ_type_br_pred_tstbit<"", OpcStr>;
+ }
+
+// Base Multiclass for New Value Jump.
+multiclass NVJ_type {
+ defm GT : NVJ_type_rr_ri<"cmp.gt">;
+ defm EQ : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">;
+ defm GTU : NVJ_type_rr_ri_no_nOne<"cmp.gtu">;
+ defm TSTBIT : NVJ_type_r0<"tstbit">;
+}
+
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ defm JMP_ : NVJ_type;
+}
+
+//===----------------------------------------------------------------------===//
+// NV/J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+// Add and accumulate.
+// Rd=add(Rs,add(Ru,#s6))
+def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ "$dst = add($src1, add($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
+ s6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Rs,sub(#s6,Ru))
+def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2,
+ (i32 IntRegs:$src3))))]>,
+ Requires<[HasV4T]>;
+
+// Generates the same instruction as ADDr_SUBri_V4 but matches different
+// pattern.
+// Rd=add(Rs,sub(#s6,Ru))
+def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2),
+ (i32 IntRegs:$src3)))]>,
+ Requires<[HasV4T]>;
+
+
+// Add or subtract doublewords with carry.
+//TODO:
+// Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+// Rdd=sub(Rss,Rtt,Px):carry
+
+
+// Logical doublewords.
+// Rdd=and(Rtt,~Rss)
+def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = and($src1, ~$src2)",
+ [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
+ (not (i64 DoubleRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// Rdd=or(Rtt,~Rss)
+def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = or($src1, ~$src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical doublewords.
+// Rxx^=xor(Rss,Rtt)
+def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical words.
+// Rx=or(Ru,and(Rx,#s10))
+def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst = or($src1, and($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ImmPred:$src3)))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,Rt)
+// Rx&=and(Rs,Rt)
+def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,Rt)
+def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,Rt)
+def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,~Rt)
+// Rx&=and(Rs,~Rt)
+def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, ~$src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,~Rt)
+def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, ~$src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,~Rt)
+def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, ~$src3)",
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=or(Rs,Rt)
+// Rx&=or(Rs,Rt)
+def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= or($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,Rt)
+def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= or($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=or(Rs,Rt)
+def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= or($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=xor(Rs,Rt)
+// Rx&=xor(Rs,Rt)
+def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= xor($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=xor(Rs,Rt)
+def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= xor($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=xor(Rs,Rt)
+def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,#s10)
+def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst |= and($src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ImmPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,#s10)
+def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst |= or($src2, #$src3)",
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ImmPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Modulo wrap
+// Rd=modwrap(Rs,Rt)
+// Round
+// Rd=cround(Rs,#u5)
+// Rd=cround(Rs,Rt)
+// Rd=round(Rs,#u5)[:sat]
+// Rd=round(Rs,Rt)[:sat]
+// Vector reduce add unsigned halfwords
+// Rd=vraddh(Rss,Rtt)
+// Vector add bytes
+// Rdd=vaddb(Rss,Rtt)
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+// Rxx+=vrcnegh(Rss,Rt)
+// Vector maximum bytes
+// Rdd=vmaxb(Rtt,Rss)
+// Vector reduce maximum halfwords
+// Rxx=vrmaxh(Rss,Ru)
+// Rxx=vrmaxuh(Rss,Ru)
+// Vector reduce maximum words
+// Rxx=vrmaxuw(Rss,Ru)
+// Rxx=vrmaxw(Rss,Ru)
+// Vector minimum bytes
+// Rdd=vminb(Rtt,Rss)
+// Vector reduce minimum halfwords
+// Rxx=vrminh(Rss,Ru)
+// Rxx=vrminuh(Rss,Ru)
+// Vector reduce minimum words
+// Rxx=vrminuw(Rss,Ru)
+// Rxx=vrminw(Rss,Ru)
+// Vector subtract bytes
+// Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Multiply and user lower result.
+// Rd=add(#u6,mpyi(Rs,#U6))
+def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add(#$src1, mpyi($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+ u6ImmPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+
+def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add(#$src1, mpyi($src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ u6ImmPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(#u6:2,Rs))
+def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi(#$src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3),
+ u6_2ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(Rs,#u6))
+def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add($src1, mpyi($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi($src2, $src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+// Rxx^=pmpyw(Rs,Rt)
+
+// Vector reduce multiply word by signed half (32x16)
+// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
+
+// Multiply and use upper result
+// Rd=mpy(Rs,Rt.H):<<1:sat
+// Rd=mpy(Rs,Rt.L):<<1:sat
+// Rd=mpy(Rs,Rt):<<1
+// Rd=mpy(Rs,Rt):<<1:sat
+// Rd=mpysu(Rs,Rt)
+// Rx+=mpy(Rs,Rt):<<1:sat
+// Rx-=mpy(Rs,Rt):<<1:sat
+
+// Vector multiply bytes
+// Rdd=vmpybsu(Rs,Rt)
+// Rdd=vmpybu(Rs,Rt)
+// Rxx+=vmpybsu(Rs,Rt)
+// Rxx+=vmpybu(Rs,Rt)
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+// Rxx^=vpmpyh(Rs,Rt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+
+// Shift by immediate and accumulate.
+// Rx=add(#u8,asl(Rx,#U5))
+def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=add(#u8,lsr(Rx,#U5))
+def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,asl(Rx,#U5))
+def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,lsr(Rx,#U5))
+def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by immediate and logical.
+//Rx=and(#u8,asl(Rx,#U5))
+def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=and(#u8,lsr(Rx,#U5))
+def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,asl(Rx,#U5))
+let AddedComplexity = 30 in
+def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, asl($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,lsr(Rx,#U5))
+let AddedComplexity = 30 in
+def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, lsr($src2, #$src3))",
+ [(set (i32 IntRegs:$dst),
+ (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by register.
+//Rd=lsl(#s6,Rt)
+def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
+ "$dst = lsl(#$src1, $src2)",
+ [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
+ (i32 IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+
+//Shift by register and logical.
+//Rxx^=asl(Rss,Rt)
+def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asl($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=asr(Rss,Rt)
+def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asr($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsl(Rss,Rt)
+def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsl($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
+ (shl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsr(Rss,Rt)
+def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsr($src2, $src3)",
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word
+//
+// Implemented:
+// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
+// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
+// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
+// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
+// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
+// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
+// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5
+// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5
+// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt
+// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt
+// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt
+// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt
+//
+// Not implemented:
+// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMw_ADDSUBi_indexed_V4:
+// pseudo operation for MEMw_ADDi_indexed_V4 and
+// MEMw_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
+ "memw($base+#$offset) += #$addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
+ "memw($base+#$offset) -= #$subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
+ "memw($base+#$offset) += $addend",
+ [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$addend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
+ "memw($base+#$offset) -= $subend",
+ [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$subend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
+ "memw($base+#$offset) &= $andend",
+ [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$andend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
+ "memw($base+#$offset) |= $orend",
+ [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMw_ADDSUBi_V4:
+// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memw($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memw($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memw($addr) += $addend",
+ [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memw($addr) -= $subend",
+ [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memw($addr) &= $andend",
+ [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memw($addr) |= $orend",
+ [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Halfword
+//
+// Implemented:
+// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
+// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
+// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
+// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
+// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
+// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
+// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5
+// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5
+// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt
+// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt
+// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt
+// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt
+//
+// Not implemented:
+// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMh_ADDSUBi_indexed_V4:
+// Pseudo operation for MEMh_ADDi_indexed_V4 and
+// MEMh_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
+ u6_1ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
+ "memh($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
+ "memh($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
+ "memh($base+#$offset) += $addend",
+ [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
+ u6_1ImmPred:$offset)),
+ (i32 IntRegs:$addend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
+ "memh($base+#$offset) -= $subend",
+ [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
+ u6_1ImmPred:$offset)),
+ (i32 IntRegs:$subend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
+ "memh($base+#$offset) += $andend",
+ [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
+ u6_1ImmPred:$offset)),
+ (i32 IntRegs:$andend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
+ "memh($base+#$offset) |= $orend",
+ [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
+ u6_1ImmPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMh_ADDSUBi_V4:
+// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+ m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memh($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memh($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memh($addr) += $addend",
+ [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+ (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memh($addr) -= $subend",
+ [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
+ (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memh($addr) &= $andend",
+ [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
+ (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memh($addr) |= $orend",
+ [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
+ (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Byte
+//
+// Implemented:
+// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5
+// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5
+// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt
+// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt
+// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt
+// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt
+// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5
+// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5
+// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt
+// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt
+// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt
+// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt
+//
+// Not implemented:
+// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5)
+// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5)
+// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5)
+// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMb_ADDSUBi_indexed_V4:
+// Pseudo operation for MEMb_ADDi_indexed_V4 and
+// MEMb_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
+ u6_0ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
+ "memb($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
+ "memb($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
+ "memb($base+#$offset) += $addend",
+ [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
+ u6_0ImmPred:$offset)),
+ (i32 IntRegs:$addend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
+ "memb($base+#$offset) -= $subend",
+ [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
+ u6_0ImmPred:$offset)),
+ (i32 IntRegs:$subend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
+ "memb($base+#$offset) += $andend",
+ [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
+ u6_0ImmPred:$offset)),
+ (i32 IntRegs:$andend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
+ "memb($base+#$offset) |= $orend",
+ [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
+ u6_0ImmPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMb_ADDSUBi_V4:
+// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+ m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memb($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memb($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memb($addr) += $addend",
+ [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+ (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memb($addr) -= $subend",
+ [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
+ (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memb($addr) &= $andend",
+ [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
+ (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memb($addr) |= $orend",
+ [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
+ (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Following instruction is not being extended as it results into the
+// incorrect code for negative numbers.
+// Pd=cmpb.eq(Rs,#u8)
+
+let isCompare = 1 in
+def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst = cmpb.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)), 255), 0))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (shl (i32 IntRegs:$src1), (i32 24)),
+ (shl (i32 IntRegs:$src2), (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+/* Incorrect Pattern -- immediate should be right shifted before being
+used in the cmpb.gt instruction.
+// Pd=cmpb.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPbGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = cmpb.gt($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (setgt (shl (i32 IntRegs:$src1), (i32 24)),
+ s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+*/
+
+// Pd=cmpb.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 24)),
+ (shl (i32 IntRegs:$src2), (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Imm:$src2),
+ "$dst = cmpb.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+ u7ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+ (and (i32 IntRegs:$src2), 255)))]>,
+ Requires<[HasV4T]>;
+
+// Following instruction is not being extended as it results into the incorrect
+// code for negative numbers.
+
+// Signed half compare(.eq) ri.
+// Pd=cmph.eq(Rs,#s8)
+let isCompare = 1 in
+def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = cmph.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535),
+ s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 1: xor + and, then compare:
+// r0=xor(r0,r1)
+// r0=and(r0,#0xffff)
+// p0=cmp.eq(r0,#0)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)),
+ 65535), 0))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 2: shift left 16 bits then compare:
+// r0=asl(r0,16)
+// r1=asl(r1,16)
+// p0=cmp.eq(r0,r1)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (shl (i32 IntRegs:$src1), (i32 16)),
+ (shl (i32 IntRegs:$src2), (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+/* Incorrect Pattern -- immediate should be right shifted before being
+used in the cmph.gt instruction.
+// Signed half compare(.gt) ri.
+// Pd=cmph.gt(Rs,#s8)
+
+let isCompare = 1 in
+def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = cmph.gt($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+ s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+*/
+
+// Signed half compare(.gt) rr.
+// Pd=cmph.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+ (shl (i32 IntRegs:$src2), (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare rr (.gtu).
+// Pd=cmph.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setugt (and (i32 IntRegs:$src1), 65535),
+ (and (i32 IntRegs:$src2), 65535)))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare ri (.gtu).
+// Pd=cmph.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Imm:$src2),
+ "$dst = cmph.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
+ u7ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//Deallocate frame and return.
+// dealloc_return
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1),
+ "dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // Restore_and_dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, isBarrier = 1,
+ Defs = [R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst // Restore_and_dealloc_before_tailcall",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Save registers function call.
+let isCall = 1, isBarrier = 1,
+ Uses = [R29, R31] in {
+ def SAVE_REGISTERS_CALL_V4 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst // Save_calle_saved_registers",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1),
+ "if ($src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:t
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+
+// Load/Store with absolute addressing mode
+// memw(#u6)=Rt
+
+multiclass ST_abs<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : STInst<(outs),
+ (ins globaladdress:$absaddr, IntRegs:$src),
+ !strconcat(OpcStr, "(##$absaddr) = $src"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_nv_V4 : STInst<(outs),
+ (ins globaladdress:$absaddr, IntRegs:$src),
+ !strconcat(OpcStr, "(##$absaddr) = $src.new"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+let AddedComplexity = 30, isPredicable = 1 in
+def STrid_abs_V4 : STInst<(outs),
+ (ins globaladdress:$absaddr, DoubleRegs:$src),
+ "memd(##$absaddr) = $src",
+ [(store (i64 DoubleRegs:$src), (HexagonCONST32 tglobaladdr:$absaddr))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if ($src1) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if (!$src1) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if ($src1.new) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if (!$src1.new) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+defm STrib : ST_abs<"memb">;
+defm STrih : ST_abs<"memh">;
+defm STriw : ST_abs<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+
+multiclass LD_abs<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$absaddr),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if ($src1) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if (!$src1) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if ($src1.new) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if (!$src1.new) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+let AddedComplexity = 30 in
+def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$absaddr),
+ "$dst = memd(##$absaddr)",
+ [(set (i64 DoubleRegs:$dst), (load (HexagonCONST32 tglobaladdr:$absaddr)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if ($src1) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if (!$src1) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if ($src1.new) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if (!$src1.new) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+defm LDrib : LD_abs<"memb">;
+defm LDriub : LD_abs<"memub">;
+defm LDrih : LD_abs<"memh">;
+defm LDriuh : LD_abs<"memuh">;
+defm LDriw : LD_abs<"memw">;
+
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriw_abs_V4 tglobaladdr: $absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDrib_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriub_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDrih_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+
+// Transfer global address into a register
+let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
+ "$dst = ##$src1",
+ [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if($src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if(!$src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if($src1.new) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if(!$src1.new) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 50, Predicates = [HasV4T] in
+def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
+ (TFRI_V4 tglobaladdr:$src1)>;
+
+
+// Load - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ "$dst=memd($src1<<#$src2+##$offset)",
+ [(set (i64 DoubleRegs:$dst),
+ (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+ def _lo_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
+ [(set IntRegs:$dst,
+ (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$offset)))))]>,
+ Requires<[HasV4T]>;
+}
+
+defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
+defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
+defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriw_ind : LD_indirect_lo<"memw", load>;
+
+// Store - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def STrid_ind_lo_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+ DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store (i64 DoubleRegs:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
+ def _lo_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+ IntRegs:$src4),
+ !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
+ [(OpNode (i32 IntRegs:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))]>,
+ Requires<[HasV4T]>;
+}
+
+defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
+defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
+defm STriw_ind : ST_indirect_lo<"memw", store>;
+
+// Store - absolute addressing mode: These instruction take constant
+// value as the extended operand
+multiclass ST_absimm<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : STInst<(outs),
+ (ins u6Imm:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(#$src1) = $src2"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1.new)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_nv_V4 : STInst<(outs),
+ (ins u6Imm:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(#$src1) = $src2.new"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1.new)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_nv_V4 : STInst<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+defm STrib_imm : ST_absimm<"memb">;
+defm STrih_imm : ST_absimm<"memh">;
+defm STriw_imm : ST_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2),
+ (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2),
+ (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2),
+ (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+
+// Load - absolute addressing mode: These instruction take constant
+// value as the extended operand
+
+multiclass LD_absimm<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : LDInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(#$src)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if ($src1) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if (!$src1) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if ($src1.new) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if (!$src1.new) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+defm LDrib_imm : LD_absimm<"memb">;
+defm LDriub_imm : LD_absimm<"memub">;
+defm LDrih_imm : LD_absimm<"memh">;
+defm LDriuh_imm : LD_absimm<"memuh">;
+defm LDriw_imm : LD_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(i32 (load u6ImmPred:$src)),
+ (LDriw_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 u6ImmPred:$src)),
+ (LDrib_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 u6ImmPred:$src)),
+ (LDriub_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 u6ImmPred:$src)),
+ (LDrih_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 u6ImmPred:$src)),
+ (LDriuh_imm_abs_V4 u6ImmPred:$src)>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STriw_offset_ext_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3),
+ "memw($src1+#$src2) = ##$src3",
+ [(store (HexagonCONST32 tglobaladdr:$src3),
+ (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STrih_offset_ext_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3),
+ "memh($src1+#$src2) = ##$src3",
+ [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
+ (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 000000000000..b15e293fdfb4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,3462 @@
+//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+//
+// ALU 32 types.
+//
+
+class qi_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_qisisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qis8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qisis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_ALU32_qis8s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sis16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s10si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU32_s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU64_di<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_ALU32_si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU32_si_tfr<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+//
+// ALU 64 types.
+//
+
+class si_ALU64_si_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_sidi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_qididi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3))]>;
+
+class di_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU64_didi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class qi_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+//
+// SInst classes.
+//
+
+class qi_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_didi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sisiu3<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_diu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_sidi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_disisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_siu6<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_si_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class di_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_diu6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5u5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisidi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6u6<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2, u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class di_SInst_dididi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_diu6u6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
+ u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiqi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiu3<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+
+class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+
+//
+// MInst classes.
+//
+
+class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_disisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_s8s8<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+
+class si_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+
+class si_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_up<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_SInst_sisi_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u4Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Add.
+def Hexagon_A2_add:
+ si_ALU32_sisi <"add", int_hexagon_A2_add>;
+def Hexagon_A2_addi:
+ si_ALU32_sis16 <"add", int_hexagon_A2_addi>;
+
+// ALU32 / ALU / Logical operations.
+def Hexagon_A2_and:
+ si_ALU32_sisi <"and", int_hexagon_A2_and>;
+def Hexagon_A2_andir:
+ si_ALU32_sis10 <"and", int_hexagon_A2_andir>;
+def Hexagon_A2_not:
+ si_ALU32_si <"not", int_hexagon_A2_not>;
+def Hexagon_A2_or:
+ si_ALU32_sisi <"or", int_hexagon_A2_or>;
+def Hexagon_A2_orir:
+ si_ALU32_sis10 <"or", int_hexagon_A2_orir>;
+def Hexagon_A2_xor:
+ si_ALU32_sisi <"xor", int_hexagon_A2_xor>;
+
+// ALU32 / ALU / Negate.
+def Hexagon_A2_neg:
+ si_ALU32_si <"neg", int_hexagon_A2_neg>;
+
+// ALU32 / ALU / Subtract.
+def Hexagon_A2_sub:
+ si_ALU32_sisi <"sub", int_hexagon_A2_sub>;
+def Hexagon_A2_subri:
+ si_ALU32_s10si <"sub", int_hexagon_A2_subri>;
+
+// ALU32 / ALU / Transfer Immediate.
+def Hexagon_A2_tfril:
+ si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>;
+def Hexagon_A2_tfrih:
+ si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>;
+def Hexagon_A2_tfrsi:
+ si_ALU32_s16 <"", int_hexagon_A2_tfrsi>;
+def Hexagon_A2_tfrpi:
+ di_ALU32_s8 <"", int_hexagon_A2_tfrpi>;
+
+// ALU32 / ALU / Transfer Register.
+def Hexagon_A2_tfr:
+ si_ALU32_si_tfr <"", int_hexagon_A2_tfr>;
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine.
+def Hexagon_A2_combinew:
+ di_ALU32_sisi <"combine", int_hexagon_A2_combinew>;
+def Hexagon_A2_combine_hh:
+ si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>;
+def Hexagon_A2_combine_lh:
+ si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>;
+def Hexagon_A2_combine_hl:
+ si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>;
+def Hexagon_A2_combine_ll:
+ si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>;
+def Hexagon_A2_combineii:
+ di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>;
+
+// ALU32 / PERM / Mux.
+def Hexagon_C2_mux:
+ si_ALU32_qisisi <"mux", int_hexagon_C2_mux>;
+def Hexagon_C2_muxri:
+ si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>;
+def Hexagon_C2_muxir:
+ si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>;
+def Hexagon_C2_muxii:
+ si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>;
+
+// ALU32 / PERM / Shift halfword.
+def Hexagon_A2_aslh:
+ si_ALU32_si <"aslh", int_hexagon_A2_aslh>;
+def Hexagon_A2_asrh:
+ si_ALU32_si <"asrh", int_hexagon_A2_asrh>;
+def SI_to_SXTHI_asrh:
+ si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>;
+
+// ALU32 / PERM / Sign/zero extend.
+def Hexagon_A2_sxth:
+ si_ALU32_si <"sxth", int_hexagon_A2_sxth>;
+def Hexagon_A2_sxtb:
+ si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>;
+def Hexagon_A2_zxth:
+ si_ALU32_si <"zxth", int_hexagon_A2_zxth>;
+def Hexagon_A2_zxtb:
+ si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>;
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Compare.
+def Hexagon_C2_cmpeq:
+ qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>;
+def Hexagon_C2_cmpeqi:
+ qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>;
+def Hexagon_C2_cmpgei:
+ qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>;
+def Hexagon_C2_cmpgeui:
+ qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>;
+def Hexagon_C2_cmpgt:
+ qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>;
+def Hexagon_C2_cmpgti:
+ qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>;
+def Hexagon_C2_cmpgtu:
+ qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>;
+def Hexagon_C2_cmpgtui:
+ qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>;
+def Hexagon_C2_cmplt:
+ qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>;
+def Hexagon_C2_cmpltu:
+ qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>;
+
+/********************************************************************
+* ALU32/VH *
+*********************************************************************/
+
+// ALU32 / VH / Vector add halfwords.
+// Rd32=vadd[u]h(Rs32,Rt32:sat]
+def Hexagon_A2_svaddh:
+ si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>;
+def Hexagon_A2_svaddhs:
+ si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>;
+def Hexagon_A2_svadduhs:
+ si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>;
+
+// ALU32 / VH / Vector average halfwords.
+def Hexagon_A2_svavgh:
+ si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>;
+def Hexagon_A2_svavghs:
+ si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>;
+def Hexagon_A2_svnavgh:
+ si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>;
+
+// ALU32 / VH / Vector subtract halfwords.
+def Hexagon_A2_svsubh:
+ si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>;
+def Hexagon_A2_svsubhs:
+ si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>;
+def Hexagon_A2_svsubuhs:
+ si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>;
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addp:
+ di_ALU64_didi <"add", int_hexagon_A2_addp>;
+def Hexagon_A2_addsat:
+ si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>;
+
+// ALU64 / ALU / Add halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_addh_l16_hl:
+ si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>;
+def Hexagon_A2_addh_l16_ll:
+ si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>;
+
+def Hexagon_A2_addh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>;
+def Hexagon_A2_addh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>;
+
+def Hexagon_A2_addh_h16_hh:
+ si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>;
+def Hexagon_A2_addh_h16_hl:
+ si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>;
+def Hexagon_A2_addh_h16_lh:
+ si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>;
+def Hexagon_A2_addh_h16_ll:
+ si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>;
+
+def Hexagon_A2_addh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>;
+def Hexagon_A2_addh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>;
+def Hexagon_A2_addh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>;
+def Hexagon_A2_addh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>;
+
+// ALU64 / ALU / Compare.
+def Hexagon_C2_cmpeqp:
+ qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>;
+def Hexagon_C2_cmpgtp:
+ qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>;
+def Hexagon_C2_cmpgtup:
+ qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>;
+
+// ALU64 / ALU / Logical operations.
+def Hexagon_A2_andp:
+ di_ALU64_didi <"and", int_hexagon_A2_andp>;
+def Hexagon_A2_orp:
+ di_ALU64_didi <"or", int_hexagon_A2_orp>;
+def Hexagon_A2_xorp:
+ di_ALU64_didi <"xor", int_hexagon_A2_xorp>;
+
+// ALU64 / ALU / Maximum.
+def Hexagon_A2_max:
+ si_ALU64_sisi <"max", int_hexagon_A2_max>;
+def Hexagon_A2_maxu:
+ si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>;
+
+// ALU64 / ALU / Minimum.
+def Hexagon_A2_min:
+ si_ALU64_sisi <"min", int_hexagon_A2_min>;
+def Hexagon_A2_minu:
+ si_ALU64_sisi <"minu", int_hexagon_A2_minu>;
+
+// ALU64 / ALU / Subtract.
+def Hexagon_A2_subp:
+ di_ALU64_didi <"sub", int_hexagon_A2_subp>;
+def Hexagon_A2_subsat:
+ si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>;
+
+// ALU64 / ALU / Subtract halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_subh_l16_hl:
+ si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>;
+def Hexagon_A2_subh_l16_ll:
+ si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>;
+
+def Hexagon_A2_subh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>;
+def Hexagon_A2_subh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>;
+
+def Hexagon_A2_subh_h16_hh:
+ si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>;
+def Hexagon_A2_subh_h16_hl:
+ si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>;
+def Hexagon_A2_subh_h16_lh:
+ si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>;
+def Hexagon_A2_subh_h16_ll:
+ si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>;
+
+def Hexagon_A2_subh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>;
+def Hexagon_A2_subh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>;
+def Hexagon_A2_subh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>;
+def Hexagon_A2_subh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>;
+
+// ALU64 / ALU / Transfer register.
+def Hexagon_A2_tfrp:
+ di_ALU64_di <"", int_hexagon_A2_tfrp>;
+
+/********************************************************************
+* ALU64/BIT *
+*********************************************************************/
+
+// ALU64 / BIT / Masked parity.
+def Hexagon_S2_parityp:
+ si_ALU64_didi <"parity", int_hexagon_S2_parityp>;
+
+/********************************************************************
+* ALU64/PERM *
+*********************************************************************/
+
+// ALU64 / PERM / Vector pack high and low halfwords.
+def Hexagon_S2_packhl:
+ di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>;
+
+/********************************************************************
+* ALU64/VB *
+*********************************************************************/
+
+// ALU64 / VB / Vector add unsigned bytes.
+def Hexagon_A2_vaddub:
+ di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>;
+def Hexagon_A2_vaddubs:
+ di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>;
+
+// ALU64 / VB / Vector average unsigned bytes.
+def Hexagon_A2_vavgub:
+ di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>;
+def Hexagon_A2_vavgubr:
+ di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>;
+
+// ALU64 / VB / Vector compare unsigned bytes.
+def Hexagon_A2_vcmpbeq:
+ qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
+def Hexagon_A2_vcmpbgtu:
+ qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 / VB / Vector maximum/minimum unsigned bytes.
+def Hexagon_A2_vmaxub:
+ di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>;
+def Hexagon_A2_vminub:
+ di_ALU64_didi <"vminub", int_hexagon_A2_vminub>;
+
+// ALU64 / VB / Vector subtract unsigned bytes.
+def Hexagon_A2_vsubub:
+ di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>;
+def Hexagon_A2_vsububs:
+ di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>;
+
+// ALU64 / VB / Vector mux.
+def Hexagon_C2_vmux:
+ di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>;
+
+
+/********************************************************************
+* ALU64/VH *
+*********************************************************************/
+
+// ALU64 / VH / Vector add halfwords.
+// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
+def Hexagon_A2_vaddh:
+ di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>;
+def Hexagon_A2_vaddhs:
+ di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>;
+def Hexagon_A2_vadduhs:
+ di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>;
+
+// ALU64 / VH / Vector average halfwords.
+// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
+def Hexagon_A2_vavgh:
+ di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>;
+def Hexagon_A2_vavghcr:
+ di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>;
+def Hexagon_A2_vavghr:
+ di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>;
+def Hexagon_A2_vavguh:
+ di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>;
+def Hexagon_A2_vavguhr:
+ di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>;
+def Hexagon_A2_vnavgh:
+ di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>;
+def Hexagon_A2_vnavghcr:
+ di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>;
+def Hexagon_A2_vnavghr:
+ di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>;
+
+// ALU64 / VH / Vector compare halfwords.
+def Hexagon_A2_vcmpheq:
+ qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>;
+def Hexagon_A2_vcmphgt:
+ qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>;
+def Hexagon_A2_vcmphgtu:
+ qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
+
+// ALU64 / VH / Vector maximum halfwords.
+def Hexagon_A2_vmaxh:
+ di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>;
+def Hexagon_A2_vmaxuh:
+ di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>;
+
+// ALU64 / VH / Vector minimum halfwords.
+def Hexagon_A2_vminh:
+ di_ALU64_didi <"vminh", int_hexagon_A2_vminh>;
+def Hexagon_A2_vminuh:
+ di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>;
+
+// ALU64 / VH / Vector subtract halfwords.
+def Hexagon_A2_vsubh:
+ di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>;
+def Hexagon_A2_vsubhs:
+ di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>;
+def Hexagon_A2_vsubuhs:
+ di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>;
+
+
+/********************************************************************
+* ALU64/VW *
+*********************************************************************/
+
+// ALU64 / VW / Vector add words.
+// Rdd32=vaddw(Rss32,Rtt32)[:sat]
+def Hexagon_A2_vaddw:
+ di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>;
+def Hexagon_A2_vaddws:
+ di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>;
+
+// ALU64 / VW / Vector average words.
+def Hexagon_A2_vavguw:
+ di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>;
+def Hexagon_A2_vavguwr:
+ di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>;
+def Hexagon_A2_vavgw:
+ di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>;
+def Hexagon_A2_vavgwcr:
+ di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>;
+def Hexagon_A2_vavgwr:
+ di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>;
+def Hexagon_A2_vnavgw:
+ di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>;
+def Hexagon_A2_vnavgwcr:
+ di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>;
+def Hexagon_A2_vnavgwr:
+ di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>;
+
+// ALU64 / VW / Vector compare words.
+def Hexagon_A2_vcmpweq:
+ qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
+def Hexagon_A2_vcmpwgt:
+ qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
+def Hexagon_A2_vcmpwgtu:
+ qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VW / Vector maximum words.
+def Hexagon_A2_vmaxw:
+ di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>;
+def Hexagon_A2_vmaxuw:
+ di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>;
+
+// ALU64 / VW / Vector minimum words.
+def Hexagon_A2_vminw:
+ di_ALU64_didi <"vminw", int_hexagon_A2_vminw>;
+def Hexagon_A2_vminuw:
+ di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>;
+
+// ALU64 / VW / Vector subtract words.
+def Hexagon_A2_vsubw:
+ di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>;
+def Hexagon_A2_vsubws:
+ di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Logical reductions on predicates.
+def Hexagon_C2_all8:
+ qi_SInst_qi <"all8", int_hexagon_C2_all8>;
+def Hexagon_C2_any8:
+ qi_SInst_qi <"any8", int_hexagon_C2_any8>;
+
+// CR / Logical operations on predicates.
+def Hexagon_C2_pxfer_map:
+ qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>;
+def Hexagon_C2_and:
+ qi_SInst_qiqi <"and", int_hexagon_C2_and>;
+def Hexagon_C2_andn:
+ qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>;
+def Hexagon_C2_not:
+ qi_SInst_qi <"not", int_hexagon_C2_not>;
+def Hexagon_C2_or:
+ qi_SInst_qiqi <"or", int_hexagon_C2_or>;
+def Hexagon_C2_orn:
+ qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>;
+def Hexagon_C2_xor:
+ qi_SInst_qiqi <"xor", int_hexagon_C2_xor>;
+
+
+/********************************************************************
+* MTYPE/ALU *
+*********************************************************************/
+
+// MTYPE / ALU / Add and accumulate.
+def Hexagon_M2_acci:
+ si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>;
+def Hexagon_M2_accii:
+ si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>;
+def Hexagon_M2_nacci:
+ si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>;
+def Hexagon_M2_naccii:
+ si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>;
+
+// MTYPE / ALU / Subtract and accumulate.
+def Hexagon_M2_subacc:
+ si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>;
+
+// MTYPE / ALU / Vector absolute difference.
+def Hexagon_M2_vabsdiffh:
+ di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
+def Hexagon_M2_vabsdiffw:
+ di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+
+// MTYPE / ALU / XOR and xor with destination.
+def Hexagon_M2_xor_xacc:
+ si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>;
+
+
+/********************************************************************
+* MTYPE/COMPLEX *
+*********************************************************************/
+
+// MTYPE / COMPLEX / Complex multiply.
+// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
+def Hexagon_M2_cmpys_s1:
+ di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>;
+def Hexagon_M2_cmpys_s0:
+ di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>;
+def Hexagon_M2_cmpysc_s1:
+ di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>;
+def Hexagon_M2_cmpysc_s0:
+ di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>;
+
+def Hexagon_M2_cmacs_s1:
+ di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>;
+def Hexagon_M2_cmacs_s0:
+ di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>;
+def Hexagon_M2_cmacsc_s1:
+ di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>;
+def Hexagon_M2_cmacsc_s0:
+ di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>;
+
+def Hexagon_M2_cnacs_s1:
+ di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>;
+def Hexagon_M2_cnacs_s0:
+ di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>;
+def Hexagon_M2_cnacsc_s1:
+ di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>;
+def Hexagon_M2_cnacsc_s0:
+ di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>;
+
+// MTYPE / COMPLEX / Complex multiply real or imaginary.
+def Hexagon_M2_cmpyr_s0:
+ di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>;
+def Hexagon_M2_cmacr_s0:
+ di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>;
+
+def Hexagon_M2_cmpyi_s0:
+ di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>;
+def Hexagon_M2_cmaci_s0:
+ di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>;
+
+// MTYPE / COMPLEX / Complex multiply with round and pack.
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def Hexagon_M2_cmpyrs_s0:
+ si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>;
+def Hexagon_M2_cmpyrs_s1:
+ si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>;
+
+def Hexagon_M2_cmpyrsc_s0:
+ si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>;
+def Hexagon_M2_cmpyrsc_s1:
+ si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>;
+
+//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
+def Hexagon_M2_vcmpy_s0_sat_i:
+ di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>;
+def Hexagon_M2_vcmpy_s1_sat_i:
+ di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>;
+
+def Hexagon_M2_vcmpy_s0_sat_r:
+ di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>;
+def Hexagon_M2_vcmpy_s1_sat_r:
+ di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>;
+
+def Hexagon_M2_vcmac_s0_sat_i:
+ di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>;
+def Hexagon_M2_vcmac_s0_sat_r:
+ di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>;
+
+//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpyi_s0:
+ di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>;
+def Hexagon_M2_vrcmpyr_s0:
+ di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>;
+
+def Hexagon_M2_vrcmpyi_s0c:
+ di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>;
+def Hexagon_M2_vrcmpyr_s0c:
+ di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>;
+
+def Hexagon_M2_vrcmaci_s0:
+ di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>;
+def Hexagon_M2_vrcmacr_s0:
+ di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>;
+
+def Hexagon_M2_vrcmaci_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>;
+def Hexagon_M2_vrcmacr_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>;
+
+
+/********************************************************************
+* MTYPE/MPYH *
+*********************************************************************/
+
+// MTYPE / MPYH / Multiply and use lower result.
+//def Hexagon_M2_mpysmi:
+// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>;
+def Hexagon_M2_mpyi:
+ si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>;
+def Hexagon_M2_mpyui:
+ si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>;
+def Hexagon_M2_macsip:
+ si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>;
+def Hexagon_M2_maci:
+ si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>;
+def Hexagon_M2_macsin:
+ si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>;
+
+// MTYPE / MPYH / Multiply word by half (32x16).
+//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyl_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>;
+def Hexagon_M2_mmpyl_s1:
+ di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>;
+def Hexagon_M2_mmpyl_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>;
+def Hexagon_M2_mmpyl_s0:
+ di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>;
+def Hexagon_M2_mmpyh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>;
+def Hexagon_M2_mmpyh_s1:
+ di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>;
+def Hexagon_M2_mmpyh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>;
+def Hexagon_M2_mmpyh_s0:
+ di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>;
+def Hexagon_M2_mmacls_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>;
+def Hexagon_M2_mmacls_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>;
+def Hexagon_M2_mmacls_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>;
+def Hexagon_M2_mmacls_s0:
+ di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>;
+def Hexagon_M2_mmachs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>;
+def Hexagon_M2_mmachs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>;
+def Hexagon_M2_mmachs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>;
+def Hexagon_M2_mmachs_s0:
+ di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>;
+
+// MTYPE / MPYH / Multiply word by unsigned half (32x16).
+//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyul_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
+def Hexagon_M2_mmpyul_s1:
+ di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
+def Hexagon_M2_mmpyul_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
+def Hexagon_M2_mmpyul_s0:
+ di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
+def Hexagon_M2_mmpyuh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
+def Hexagon_M2_mmpyuh_s1:
+ di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
+def Hexagon_M2_mmpyuh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
+def Hexagon_M2_mmpyuh_s0:
+ di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
+def Hexagon_M2_mmaculs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
+def Hexagon_M2_mmaculs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
+def Hexagon_M2_mmaculs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
+def Hexagon_M2_mmaculs_s0:
+ di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
+def Hexagon_M2_mmacuhs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
+def Hexagon_M2_mmacuhs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
+def Hexagon_M2_mmacuhs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
+def Hexagon_M2_mmacuhs_s0:
+ di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
+
+// MTYPE / MPYH / Multiply and use upper result.
+def Hexagon_M2_hmmpyh_rs1:
+ si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>;
+def Hexagon_M2_hmmpyl_rs1:
+ si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>;
+def Hexagon_M2_mpy_up:
+ si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>;
+def Hexagon_M2_dpmpyss_rnd_s0:
+ si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>;
+def Hexagon_M2_mpyu_up:
+ si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>;
+
+// MTYPE / MPYH / Multiply and use full result.
+def Hexagon_M2_dpmpyuu_s0:
+ di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>;
+def Hexagon_M2_dpmpyuu_acc_s0:
+ di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>;
+def Hexagon_M2_dpmpyuu_nac_s0:
+ di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>;
+def Hexagon_M2_dpmpyss_s0:
+ di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>;
+def Hexagon_M2_dpmpyss_acc_s0:
+ di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>;
+def Hexagon_M2_dpmpyss_nac_s0:
+ di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>;
+
+
+/********************************************************************
+* MTYPE/MPYS *
+*********************************************************************/
+
+// MTYPE / MPYS / Scalar 16x16 multiply signed.
+//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
+// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
+def Hexagon_M2_mpy_hh_s0:
+ si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>;
+def Hexagon_M2_mpy_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s1:
+ si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_rnd_hh_s1:
+ si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_hh_s1:
+ si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s0:
+ si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_rnd_hh_s0:
+ si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_hh_s0:
+ si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>;
+
+def Hexagon_M2_mpy_hl_s0:
+ si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>;
+def Hexagon_M2_mpy_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s1:
+ si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_rnd_hl_s1:
+ si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_hl_s1:
+ si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s0:
+ si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_rnd_hl_s0:
+ si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_hl_s0:
+ si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>;
+
+def Hexagon_M2_mpy_lh_s0:
+ si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>;
+def Hexagon_M2_mpy_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s1:
+ si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_rnd_lh_s1:
+ si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_lh_s1:
+ si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s0:
+ si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_rnd_lh_s0:
+ si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_lh_s0:
+ si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>;
+
+def Hexagon_M2_mpy_ll_s0:
+ si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>;
+def Hexagon_M2_mpy_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s1:
+ si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_rnd_ll_s1:
+ si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_ll_s1:
+ si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s0:
+ si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_rnd_ll_s0:
+ si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_ll_s0:
+ si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>;
+
+//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
+def Hexagon_M2_mpyd_hh_s0:
+ di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>;
+def Hexagon_M2_mpyd_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s1:
+ di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s0:
+ di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>;
+
+def Hexagon_M2_mpyd_hl_s0:
+ di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>;
+def Hexagon_M2_mpyd_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s1:
+ di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s0:
+ di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>;
+
+def Hexagon_M2_mpyd_lh_s0:
+ di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>;
+def Hexagon_M2_mpyd_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s1:
+ di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s0:
+ di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>;
+
+def Hexagon_M2_mpyd_ll_s0:
+ di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>;
+def Hexagon_M2_mpyd_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s1:
+ di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s0:
+ di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>;
+def Hexagon_M2_mpy_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s1:
+ si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s0:
+ si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def Hexagon_M2_mpy_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>;
+def Hexagon_M2_mpy_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s1:
+ si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s0:
+ si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>;
+
+def Hexagon_M2_mpy_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>;
+def Hexagon_M2_mpy_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s1:
+ si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s0:
+ si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>;
+
+def Hexagon_M2_mpy_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>;
+def Hexagon_M2_mpy_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s1:
+ si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s0:
+ si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>;
+def Hexagon_M2_mpy_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s1:
+ si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s0:
+ si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+def Hexagon_M2_mpy_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>;
+def Hexagon_M2_mpy_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s1:
+ si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s0:
+ si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>;
+
+def Hexagon_M2_mpy_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>;
+def Hexagon_M2_mpy_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s1:
+ si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s0:
+ si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>;
+
+def Hexagon_M2_mpy_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>;
+def Hexagon_M2_mpy_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s1:
+ si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s0:
+ si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>;
+def Hexagon_M2_mpyd_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>;
+
+def Hexagon_M2_mpyd_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>;
+def Hexagon_M2_mpyd_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>;
+
+def Hexagon_M2_mpyd_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>;
+def Hexagon_M2_mpyd_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>;
+
+def Hexagon_M2_mpyd_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>;
+def Hexagon_M2_mpyd_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>;
+def Hexagon_M2_mpyd_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>;
+
+def Hexagon_M2_mpyd_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>;
+def Hexagon_M2_mpyd_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>;
+
+def Hexagon_M2_mpyd_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>;
+def Hexagon_M2_mpyd_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>;
+
+def Hexagon_M2_mpyd_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>;
+def Hexagon_M2_mpyd_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>;
+
+// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_hh_s0:
+ si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>;
+def Hexagon_M2_mpyu_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>;
+def Hexagon_M2_mpyu_hl_s0:
+ si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>;
+def Hexagon_M2_mpyu_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>;
+def Hexagon_M2_mpyu_lh_s0:
+ si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>;
+def Hexagon_M2_mpyu_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>;
+def Hexagon_M2_mpyu_ll_s0:
+ si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>;
+def Hexagon_M2_mpyu_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>;
+
+//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_hh_s0:
+ di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>;
+def Hexagon_M2_mpyud_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>;
+def Hexagon_M2_mpyud_hl_s0:
+ di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>;
+def Hexagon_M2_mpyud_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>;
+def Hexagon_M2_mpyud_lh_s0:
+ di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>;
+def Hexagon_M2_mpyud_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>;
+def Hexagon_M2_mpyud_ll_s0:
+ di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>;
+def Hexagon_M2_mpyud_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>;
+def Hexagon_M2_mpyu_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>;
+def Hexagon_M2_mpyu_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>;
+def Hexagon_M2_mpyu_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>;
+def Hexagon_M2_mpyu_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>;
+def Hexagon_M2_mpyu_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>;
+def Hexagon_M2_mpyu_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>;
+def Hexagon_M2_mpyu_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>;
+def Hexagon_M2_mpyu_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>;
+def Hexagon_M2_mpyu_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>;
+def Hexagon_M2_mpyu_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>;
+def Hexagon_M2_mpyu_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>;
+def Hexagon_M2_mpyu_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>;
+def Hexagon_M2_mpyu_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>;
+def Hexagon_M2_mpyu_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>;
+
+//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
+def Hexagon_M2_mpyud_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
+def Hexagon_M2_mpyud_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
+def Hexagon_M2_mpyud_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
+def Hexagon_M2_mpyud_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
+def Hexagon_M2_mpyud_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
+def Hexagon_M2_mpyud_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
+def Hexagon_M2_mpyud_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
+
+//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
+def Hexagon_M2_mpyud_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
+def Hexagon_M2_mpyud_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
+def Hexagon_M2_mpyud_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
+def Hexagon_M2_mpyud_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
+def Hexagon_M2_mpyud_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
+def Hexagon_M2_mpyud_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
+def Hexagon_M2_mpyud_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_A2_vraddub:
+ di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>;
+def Hexagon_A2_vraddub_acc:
+ di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>;
+
+// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
+def Hexagon_A2_vrsadub:
+ di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>;
+def Hexagon_A2_vrsadub_acc:
+ di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+
+/********************************************************************
+* MTYPE/VH *
+*********************************************************************/
+
+// MTYPE / VH / Vector dual multiply.
+def Hexagon_M2_vdmpys_s1:
+ di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>;
+def Hexagon_M2_vdmpys_s0:
+ di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>;
+def Hexagon_M2_vdmacs_s1:
+ di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>;
+def Hexagon_M2_vdmacs_s0:
+ di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>;
+
+// MTYPE / VH / Vector dual multiply with round and pack.
+def Hexagon_M2_vdmpyrs_s0:
+ si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>;
+def Hexagon_M2_vdmpyrs_s1:
+ si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>;
+
+// MTYPE / VH / Vector multiply even halfwords.
+def Hexagon_M2_vmpy2es_s1:
+ di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>;
+def Hexagon_M2_vmpy2es_s0:
+ di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>;
+def Hexagon_M2_vmac2es:
+ di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>;
+def Hexagon_M2_vmac2es_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>;
+def Hexagon_M2_vmac2es_s0:
+ di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>;
+
+// MTYPE / VH / Vector multiply halfwords.
+def Hexagon_M2_vmpy2s_s0:
+ di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>;
+def Hexagon_M2_vmpy2s_s1:
+ di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>;
+def Hexagon_M2_vmac2:
+ di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>;
+def Hexagon_M2_vmac2s_s0:
+ di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>;
+def Hexagon_M2_vmac2s_s1:
+ di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>;
+
+// MTYPE / VH / Vector multiply halfwords with round and pack.
+def Hexagon_M2_vmpy2s_s0pack:
+ si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>;
+def Hexagon_M2_vmpy2s_s1pack:
+ si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>;
+
+// MTYPE / VH / Vector reduce multiply halfwords.
+// Rxx32+=vrmpyh(Rss32,Rtt32)
+def Hexagon_M2_vrmpy_s0:
+ di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>;
+def Hexagon_M2_vrmac_s0:
+ di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>;
+
+
+/********************************************************************
+* STYPE/ALU *
+*********************************************************************/
+
+// STYPE / ALU / Absolute value.
+def Hexagon_A2_abs:
+ si_SInst_si <"abs", int_hexagon_A2_abs>;
+def Hexagon_A2_absp:
+ di_SInst_di <"abs", int_hexagon_A2_absp>;
+def Hexagon_A2_abssat:
+ si_SInst_si_sat <"abs", int_hexagon_A2_abssat>;
+
+// STYPE / ALU / Negate.
+def Hexagon_A2_negp:
+ di_SInst_di <"neg", int_hexagon_A2_negp>;
+def Hexagon_A2_negsat:
+ si_SInst_si_sat <"neg", int_hexagon_A2_negsat>;
+
+// STYPE / ALU / Logical Not.
+def Hexagon_A2_notp:
+ di_SInst_di <"not", int_hexagon_A2_notp>;
+
+// STYPE / ALU / Sign extend word to doubleword.
+def Hexagon_A2_sxtw:
+ di_SInst_si <"sxtw", int_hexagon_A2_sxtw>;
+
+
+/********************************************************************
+* STYPE/BIT *
+*********************************************************************/
+
+// STYPE / BIT / Count leading.
+def Hexagon_S2_cl0:
+ si_SInst_si <"cl0", int_hexagon_S2_cl0>;
+def Hexagon_S2_cl0p:
+ si_SInst_di <"cl0", int_hexagon_S2_cl0p>;
+def Hexagon_S2_cl1:
+ si_SInst_si <"cl1", int_hexagon_S2_cl1>;
+def Hexagon_S2_cl1p:
+ si_SInst_di <"cl1", int_hexagon_S2_cl1p>;
+def Hexagon_S2_clb:
+ si_SInst_si <"clb", int_hexagon_S2_clb>;
+def Hexagon_S2_clbp:
+ si_SInst_di <"clb", int_hexagon_S2_clbp>;
+def Hexagon_S2_clbnorm:
+ si_SInst_si <"normamt", int_hexagon_S2_clbnorm>;
+
+// STYPE / BIT / Count trailing.
+def Hexagon_S2_ct0:
+ si_SInst_si <"ct0", int_hexagon_S2_ct0>;
+def Hexagon_S2_ct1:
+ si_SInst_si <"ct1", int_hexagon_S2_ct1>;
+
+// STYPE / BIT / Compare bit mask.
+def HEXAGON_C2_bitsclr:
+ qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>;
+def HEXAGON_C2_bitsclri:
+ qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>;
+def HEXAGON_C2_bitsset:
+ qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>;
+
+// STYPE / BIT / Extract unsigned.
+// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
+def Hexagon_S2_extractu:
+ si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>;
+def Hexagon_S2_extractu_rp:
+ si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>;
+def Hexagon_S2_extractup:
+ di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>;
+def Hexagon_S2_extractup_rp:
+ di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>;
+
+// STYPE / BIT / Insert bitfield.
+def HEXAGON_S2_insert:
+ si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>;
+def HEXAGON_S2_insert_rp:
+ si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>;
+def HEXAGON_S2_insertp:
+ di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>;
+def HEXAGON_S2_insertp_rp:
+ di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>;
+
+// STYPE / BIT / Innterleave/deinterleave.
+def HEXAGON_S2_interleave:
+ di_SInst_di <"interleave", int_hexagon_S2_interleave>;
+def HEXAGON_S2_deinterleave:
+ di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>;
+
+// STYPE / BIT / Linear feedback-shift Iteration.
+def HEXAGON_S2_lfsp:
+ di_SInst_didi <"lfs", int_hexagon_S2_lfsp>;
+
+// STYPE / BIT / Bit reverse.
+def HEXAGON_S2_brev:
+ si_SInst_si <"brev", int_hexagon_S2_brev>;
+
+// STYPE / BIT / Set/Clear/Toggle Bit.
+def Hexagon_S2_setbit_i:
+ si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>;
+def Hexagon_S2_togglebit_i:
+ si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>;
+def Hexagon_S2_clrbit_i:
+ si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>;
+def Hexagon_S2_setbit_r:
+ si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>;
+def Hexagon_S2_togglebit_r:
+ si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>;
+def Hexagon_S2_clrbit_r:
+ si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>;
+
+// STYPE / BIT / Test Bit.
+def Hexagon_S2_tstbit_i:
+ qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>;
+def Hexagon_S2_tstbit_r:
+ qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>;
+
+
+/********************************************************************
+* STYPE/COMPLEX *
+*********************************************************************/
+
+// STYPE / COMPLEX / Vector Complex conjugate.
+def Hexagon_A2_vconj:
+ di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>;
+
+// STYPE / COMPLEX / Vector Complex rotate.
+def Hexagon_S2_vcrotate:
+ di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>;
+
+
+/********************************************************************
+* STYPE/PERM *
+*********************************************************************/
+
+// STYPE / PERM / Saturate.
+def Hexagon_A2_sat:
+ si_SInst_di <"sat", int_hexagon_A2_sat>;
+def Hexagon_A2_satb:
+ si_SInst_si <"satb", int_hexagon_A2_satb>;
+def Hexagon_A2_sath:
+ si_SInst_si <"sath", int_hexagon_A2_sath>;
+def Hexagon_A2_satub:
+ si_SInst_si <"satub", int_hexagon_A2_satub>;
+def Hexagon_A2_satuh:
+ si_SInst_si <"satuh", int_hexagon_A2_satuh>;
+
+// STYPE / PERM / Swizzle bytes.
+def Hexagon_A2_swiz:
+ si_SInst_si <"swiz", int_hexagon_A2_swiz>;
+
+// STYPE / PERM / Vector align.
+// Need custom lowering
+def Hexagon_S2_valignib:
+ di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>;
+def Hexagon_S2_valignrb:
+ di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>;
+
+// STYPE / PERM / Vector round and pack.
+def Hexagon_S2_vrndpackwh:
+ si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>;
+def Hexagon_S2_vrndpackwhs:
+ si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>;
+
+// STYPE / PERM / Vector saturate and pack.
+def Hexagon_S2_svsathb:
+ si_SInst_si <"vsathb", int_hexagon_S2_svsathb>;
+def Hexagon_S2_vsathb:
+ si_SInst_di <"vsathb", int_hexagon_S2_vsathb>;
+def Hexagon_S2_svsathub:
+ si_SInst_si <"vsathub", int_hexagon_S2_svsathub>;
+def Hexagon_S2_vsathub:
+ si_SInst_di <"vsathub", int_hexagon_S2_vsathub>;
+def Hexagon_S2_vsatwh:
+ si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>;
+def Hexagon_S2_vsatwuh:
+ si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>;
+
+// STYPE / PERM / Vector saturate without pack.
+def Hexagon_S2_vsathb_nopack:
+ di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>;
+def Hexagon_S2_vsathub_nopack:
+ di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>;
+def Hexagon_S2_vsatwh_nopack:
+ di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>;
+def Hexagon_S2_vsatwuh_nopack:
+ di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
+
+// STYPE / PERM / Vector shuffle.
+def Hexagon_S2_shuffeb:
+ di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>;
+def Hexagon_S2_shuffeh:
+ di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>;
+def Hexagon_S2_shuffob:
+ di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>;
+def Hexagon_S2_shuffoh:
+ di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>;
+
+// STYPE / PERM / Vector splat bytes.
+def Hexagon_S2_vsplatrb:
+ si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>;
+
+// STYPE / PERM / Vector splat halfwords.
+def Hexagon_S2_vsplatrh:
+ di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>;
+
+// STYPE / PERM / Vector splice.
+def HEXAGON_S2_vsplicerb:
+ di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>;
+def HEXAGON_S2_vspliceib:
+ di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>;
+
+// STYPE / PERM / Sign extend.
+def Hexagon_S2_vsxtbh:
+ di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>;
+def Hexagon_S2_vsxthw:
+ di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>;
+
+// STYPE / PERM / Truncate.
+def Hexagon_S2_vtrunehb:
+ si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>;
+def Hexagon_S2_vtrunohb:
+ si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>;
+def Hexagon_S2_vtrunewh:
+ di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>;
+def Hexagon_S2_vtrunowh:
+ di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>;
+
+// STYPE / PERM / Zero extend.
+def Hexagon_S2_vzxtbh:
+ di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>;
+def Hexagon_S2_vzxthw:
+ di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>;
+
+
+/********************************************************************
+* STYPE/PRED *
+*********************************************************************/
+
+// STYPE / PRED / Mask generate from predicate.
+def Hexagon_C2_mask:
+ di_SInst_qi <"mask", int_hexagon_C2_mask>;
+
+// STYPE / PRED / Predicate transfer.
+def Hexagon_C2_tfrpr:
+ si_SInst_qi <"", int_hexagon_C2_tfrpr>;
+def Hexagon_C2_tfrrp:
+ qi_SInst_si <"", int_hexagon_C2_tfrrp>;
+
+// STYPE / PRED / Viterbi pack even and odd predicate bits.
+def Hexagon_C2_vitpack:
+ si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>;
+
+
+/********************************************************************
+* STYPE/SHIFT *
+*********************************************************************/
+
+// STYPE / SHIFT / Shift by immediate.
+def Hexagon_S2_asl_i_r:
+ si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>;
+def Hexagon_S2_asr_i_r:
+ si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>;
+def Hexagon_S2_lsr_i_r:
+ si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>;
+def Hexagon_S2_asl_i_p:
+ di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>;
+def Hexagon_S2_asr_i_p:
+ di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>;
+def Hexagon_S2_lsr_i_p:
+ di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>;
+
+// STYPE / SHIFT / Shift by immediate and accumulate.
+def Hexagon_S2_asl_i_r_acc:
+ si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>;
+def Hexagon_S2_asr_i_r_acc:
+ si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>;
+def Hexagon_S2_lsr_i_r_acc:
+ si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>;
+def Hexagon_S2_asl_i_r_nac:
+ si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>;
+def Hexagon_S2_asr_i_r_nac:
+ si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>;
+def Hexagon_S2_lsr_i_r_nac:
+ si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>;
+def Hexagon_S2_asl_i_p_acc:
+ di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>;
+def Hexagon_S2_asr_i_p_acc:
+ di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>;
+def Hexagon_S2_lsr_i_p_acc:
+ di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>;
+def Hexagon_S2_asl_i_p_nac:
+ di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>;
+def Hexagon_S2_asr_i_p_nac:
+ di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>;
+def Hexagon_S2_lsr_i_p_nac:
+ di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>;
+
+// STYPE / SHIFT / Shift by immediate and add.
+def Hexagon_S2_addasl_rrri:
+ si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>;
+
+// STYPE / SHIFT / Shift by immediate and logical.
+def Hexagon_S2_asl_i_r_and:
+ si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>;
+def Hexagon_S2_asr_i_r_and:
+ si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>;
+def Hexagon_S2_lsr_i_r_and:
+ si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>;
+
+def Hexagon_S2_asl_i_r_xacc:
+ si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>;
+def Hexagon_S2_lsr_i_r_xacc:
+ si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>;
+
+def Hexagon_S2_asl_i_r_or:
+ si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>;
+def Hexagon_S2_asr_i_r_or:
+ si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>;
+def Hexagon_S2_lsr_i_r_or:
+ si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>;
+
+def Hexagon_S2_asl_i_p_and:
+ di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>;
+def Hexagon_S2_asr_i_p_and:
+ di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>;
+def Hexagon_S2_lsr_i_p_and:
+ di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>;
+
+def Hexagon_S2_asl_i_p_xacc:
+ di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>;
+def Hexagon_S2_lsr_i_p_xacc:
+ di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>;
+
+def Hexagon_S2_asl_i_p_or:
+ di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>;
+def Hexagon_S2_asr_i_p_or:
+ di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>;
+def Hexagon_S2_lsr_i_p_or:
+ di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>;
+
+// STYPE / SHIFT / Shift right by immediate with rounding.
+def Hexagon_S2_asr_i_r_rnd:
+ si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>;
+def Hexagon_S2_asr_i_r_rnd_goodsyntax:
+ si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// STYPE / SHIFT / Shift left by immediate with saturation.
+def Hexagon_S2_asl_i_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>;
+
+// STYPE / SHIFT / Shift by register.
+def Hexagon_S2_asl_r_r:
+ si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>;
+def Hexagon_S2_asr_r_r:
+ si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>;
+def Hexagon_S2_lsl_r_r:
+ si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>;
+def Hexagon_S2_lsr_r_r:
+ si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>;
+def Hexagon_S2_asl_r_p:
+ di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>;
+def Hexagon_S2_asr_r_p:
+ di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>;
+def Hexagon_S2_lsl_r_p:
+ di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>;
+def Hexagon_S2_lsr_r_p:
+ di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>;
+
+// STYPE / SHIFT / Shift by register and accumulate.
+def Hexagon_S2_asl_r_r_acc:
+ si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>;
+def Hexagon_S2_asr_r_r_acc:
+ si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>;
+def Hexagon_S2_lsl_r_r_acc:
+ si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>;
+def Hexagon_S2_lsr_r_r_acc:
+ si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>;
+def Hexagon_S2_asl_r_p_acc:
+ di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>;
+def Hexagon_S2_asr_r_p_acc:
+ di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>;
+def Hexagon_S2_lsl_r_p_acc:
+ di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>;
+def Hexagon_S2_lsr_r_p_acc:
+ di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>;
+
+def Hexagon_S2_asl_r_r_nac:
+ si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>;
+def Hexagon_S2_asr_r_r_nac:
+ si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>;
+def Hexagon_S2_lsl_r_r_nac:
+ si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>;
+def Hexagon_S2_lsr_r_r_nac:
+ si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>;
+def Hexagon_S2_asl_r_p_nac:
+ di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>;
+def Hexagon_S2_asr_r_p_nac:
+ di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>;
+def Hexagon_S2_lsl_r_p_nac:
+ di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>;
+def Hexagon_S2_lsr_r_p_nac:
+ di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>;
+
+// STYPE / SHIFT / Shift by register and logical.
+def Hexagon_S2_asl_r_r_and:
+ si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>;
+def Hexagon_S2_asr_r_r_and:
+ si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>;
+def Hexagon_S2_lsl_r_r_and:
+ si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>;
+def Hexagon_S2_lsr_r_r_and:
+ si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>;
+
+def Hexagon_S2_asl_r_r_or:
+ si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>;
+def Hexagon_S2_asr_r_r_or:
+ si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>;
+def Hexagon_S2_lsl_r_r_or:
+ si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>;
+def Hexagon_S2_lsr_r_r_or:
+ si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>;
+
+def Hexagon_S2_asl_r_p_and:
+ di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>;
+def Hexagon_S2_asr_r_p_and:
+ di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>;
+def Hexagon_S2_lsl_r_p_and:
+ di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>;
+def Hexagon_S2_lsr_r_p_and:
+ di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>;
+
+def Hexagon_S2_asl_r_p_or:
+ di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>;
+def Hexagon_S2_asr_r_p_or:
+ di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>;
+def Hexagon_S2_lsl_r_p_or:
+ di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>;
+def Hexagon_S2_lsr_r_p_or:
+ di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>;
+
+// STYPE / SHIFT / Shift by register with saturation.
+def Hexagon_S2_asl_r_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>;
+def Hexagon_S2_asr_r_r_sat:
+ si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>;
+
+// STYPE / SHIFT / Table Index.
+def HEXAGON_S2_tableidxb_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
+def HEXAGON_S2_tableidxd_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
+def HEXAGON_S2_tableidxh_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
+def HEXAGON_S2_tableidxw_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+
+
+/********************************************************************
+* STYPE/VH *
+*********************************************************************/
+
+// STYPE / VH / Vector absolute value halfwords.
+// Rdd64=vabsh(Rss64)
+def Hexagon_A2_vabsh:
+ di_SInst_di <"vabsh", int_hexagon_A2_vabsh>;
+def Hexagon_A2_vabshsat:
+ di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>;
+
+// STYPE / VH / Vector shift halfwords by immediate.
+// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
+def Hexagon_S2_asl_i_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>;
+def Hexagon_S2_asr_i_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>;
+def Hexagon_S2_lsr_i_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>;
+
+// STYPE / VH / Vector shift halfwords by register.
+// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>;
+def Hexagon_S2_asr_r_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>;
+def Hexagon_S2_lsl_r_vh:
+ di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>;
+def Hexagon_S2_lsr_r_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>;
+
+
+/********************************************************************
+* STYPE/VW *
+*********************************************************************/
+
+// STYPE / VW / Vector absolute value words.
+def Hexagon_A2_vabsw:
+ di_SInst_di <"vabsw", int_hexagon_A2_vabsw>;
+def Hexagon_A2_vabswsat:
+ di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>;
+
+// STYPE / VW / Vector shift words by immediate.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_i_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>;
+def Hexagon_S2_asr_i_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>;
+def Hexagon_S2_lsr_i_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>;
+
+// STYPE / VW / Vector shift words by register.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>;
+def Hexagon_S2_asr_r_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>;
+def Hexagon_S2_lsl_r_vw:
+ di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>;
+def Hexagon_S2_lsr_r_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>;
+
+// STYPE / VW / Vector shift words with truncate and pack.
+def Hexagon_S2_asr_r_svw_trun:
+ si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>;
+def Hexagon_S2_asr_i_svw_trun:
+ si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 000000000000..68eaf68480e0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,29 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+ (COMBINE_rr
+ (Hexagon_M2_maci
+ (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)),
+ (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+ subreg_loreg))>;
+
+
+
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 000000000000..2a54e62d20ae
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,50 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+
+// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpys_s1:
+ di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>;
+def Hexagon_M2_vrcmpys_acc_s1:
+ di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>;
+def Hexagon_M2_vrcmpys_s1rp:
+ si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>;
+
+
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_M2_vradduh:
+ si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>;
+
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addsp:
+ di_ALU64_sidi <"add", int_hexagon_A2_addsp>;
+def Hexagon_A2_addpsat:
+ di_ALU64_didi <"add", int_hexagon_A2_addpsat>;
+
+def Hexagon_A2_maxp:
+ di_ALU64_didi <"max", int_hexagon_A2_maxp>;
+def Hexagon_A2_maxup:
+ di_ALU64_didi <"maxu", int_hexagon_A2_maxup>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 000000000000..dd28ebb57231
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,369 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+
+//
+// ALU 32 types.
+//
+
+class si_ALU32_sisi_not<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_s8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+//
+// SInst Classes.
+//
+class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_si_addsis6<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, add($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_si_subs6si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, sub(#$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class di_ALU64_didi_neg<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_and<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, #$src3))")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_or<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_or<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_siu5_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>;
+def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
+
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine Words Into Doublewords.
+def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>;
+def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
+
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Conditional Shift Halfword.
+// ALU32 / PRED / Conditional Sign Extend.
+// ALU32 / PRED / Conditional Zero Extend.
+// ALU32 / PRED / Compare.
+def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>;
+def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
+def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
+def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
+def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+// ALU32 / PRED / cmpare To General Register.
+def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
+def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
+def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>;
+def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Corner Detection Acceleration.
+def Hexagon_C4_fastcorner9:
+ qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
+def Hexagon_C4_fastcorner9_not:
+ qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
+
+// CR / Logical Operations On Predicates.
+def Hexagon_C4_and_andn:
+ qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>;
+def Hexagon_C4_and_and:
+ qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>;
+def Hexagon_C4_and_orn:
+ qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>;
+def Hexagon_C4_and_or:
+ qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>;
+def Hexagon_C4_or_andn:
+ qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>;
+def Hexagon_C4_or_and:
+ qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>;
+def Hexagon_C4_or_orn:
+ qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>;
+def Hexagon_C4_or_or:
+ qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>;
+
+
+/********************************************************************
+* XTYPE/ALU *
+*********************************************************************/
+
+// XTYPE / ALU / Add And Accumulate.
+def Hexagon_S4_addaddi:
+ si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>;
+def Hexagon_S4_subaddi:
+ si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>;
+
+// XTYPE / ALU / Logical Doublewords.
+def Hexagon_S4_andnp:
+ di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>;
+def Hexagon_S4_ornp:
+ di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>;
+
+// XTYPE / ALU / Logical-logical Doublewords.
+def Hexagon_M4_xor_xacc:
+ di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>;
+
+// XTYPE / ALU / Logical-logical Words.
+def HEXAGON_M4_and_and:
+ si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>;
+def HEXAGON_M4_and_or:
+ si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>;
+def HEXAGON_M4_and_xor:
+ si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>;
+def HEXAGON_M4_and_andn:
+ si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>;
+def HEXAGON_M4_xor_and:
+ si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>;
+def HEXAGON_M4_xor_or:
+ si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>;
+def HEXAGON_M4_xor_andn:
+ si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>;
+def HEXAGON_M4_or_and:
+ si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>;
+def HEXAGON_M4_or_or:
+ si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>;
+def HEXAGON_M4_or_xor:
+ si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>;
+def HEXAGON_M4_or_andn:
+ si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>;
+def HEXAGON_S4_or_andix:
+ si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>;
+def HEXAGON_S4_or_andi:
+ si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>;
+def HEXAGON_S4_or_ori:
+ si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>;
+
+// XTYPE / ALU / Modulo wrap.
+def HEXAGON_A4_modwrapu:
+ si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>;
+
+// XTYPE / ALU / Round.
+def HEXAGON_A4_cround_ri:
+ si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>;
+def HEXAGON_A4_cround_rr:
+ si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>;
+def HEXAGON_A4_round_ri:
+ si_SInst_siu5 <"round", int_hexagon_A4_round_ri>;
+def HEXAGON_A4_round_rr:
+ si_SInst_sisi <"round", int_hexagon_A4_round_rr>;
+def HEXAGON_A4_round_ri_sat:
+ si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>;
+def HEXAGON_A4_round_rr_sat:
+ si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>;
+
+// XTYPE / ALU / Vector reduce add unsigned halfwords.
+// XTYPE / ALU / Vector add bytes.
+// XTYPE / ALU / Vector conditional negate.
+// XTYPE / ALU / Vector maximum bytes.
+// XTYPE / ALU / Vector reduce maximum halfwords.
+// XTYPE / ALU / Vector reduce maximum words.
+// XTYPE / ALU / Vector minimum bytes.
+// XTYPE / ALU / Vector reduce minimum halfwords.
+// XTYPE / ALU / Vector reduce minimum words.
+// XTYPE / ALU / Vector subtract bytes.
+
+
+/********************************************************************
+* XTYPE/BIT *
+*********************************************************************/
+
+// XTYPE / BIT / Count leading.
+// XTYPE / BIT / Count trailing.
+// XTYPE / BIT / Extract bitfield.
+// XTYPE / BIT / Masked parity.
+// XTYPE / BIT / Bit reverse.
+// XTYPE / BIT / Split bitfield.
+
+
+/********************************************************************
+* XTYPE/COMPLEX *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
+// XTYPE / COMPLEX / Complex add/sub words.
+// XTYPE / COMPLEX / Complex multiply 32x16.
+// XTYPE / COMPLEX / Vector reduce complex rotate.
+
+
+/********************************************************************
+* XTYPE/MPY *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
new file mode 100644
index 000000000000..16ea7cf6ed7f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMCInst.h
@@ -0,0 +1,41 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+ class HexagonMCInst: public MCInst {
+ // Packet start and end markers
+ unsigned startPacket: 1, endPacket: 1;
+ const MachineInstr *MachineI;
+ public:
+ explicit HexagonMCInst(): MCInst(),
+ startPacket(0), endPacket(0) {}
+
+ const MachineInstr* getMI() const { return MachineI; };
+
+ void setMI(const MachineInstr *MI) { MachineI = MI; };
+
+ bool isStartPacket() const { return (startPacket); };
+ bool isEndPacket() const { return (endPacket); };
+
+ void setStartPacket(bool yes) { startPacket = yes; };
+ void setEndPacket(bool yes) { endPacket = yes; };
+ };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
new file mode 100644
index 000000000000..70bddcc76a59
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -0,0 +1,93 @@
+//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Hexagon MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
+ HexagonAsmPrinter& Printer) {
+ MCContext &MC = Printer.OutContext;
+ const MCExpr *ME;
+
+ ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC);
+
+ if (!MO.isJTI() && MO.getOffset())
+ ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC),
+ MC);
+
+ return (MCOperand::CreateExpr(ME));
+}
+
+// Create an MCInst from a MachineInstr
+void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI,
+ HexagonAsmPrinter& AP) {
+ MCI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCO;
+
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCO = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ // FP immediates are used only when setting GPRs, so they may be dealt
+ // with like regular immediates from this point on.
+ MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCO = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCO = MCOperand::CreateExpr
+ (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(),
+ AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
+ AP);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+ break;
+ }
+
+ MCI.addOperand(MCO);
+ }
+}
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 000000000000..0318c519e453
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,75 @@
+//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMACHINEFUNCTIONINFO_H
+#define HexagonMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ namespace Hexagon {
+ const unsigned int StartPacket = 0x1;
+ const unsigned int EndPacket = 0x2;
+ }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+ // SRetReturnReg - Some subtargets require that sret lowering includes
+ // returning the value of the returned struct in a register. This field
+ // holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+ std::vector<MachineInstr*> AllocaAdjustInsts;
+ int VarArgsFrameIndex;
+ bool HasClobberLR;
+
+ std::map<const MachineInstr*, unsigned> PacketInfo;
+
+
+public:
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+
+ HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+ HasClobberLR(0) {}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ void addAllocaAdjustInst(MachineInstr* MI) {
+ AllocaAdjustInsts.push_back(MI);
+ }
+ const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+ return AllocaAdjustInsts;
+ }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ void setStartPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::StartPacket;
+ }
+ void setEndPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::EndPacket;
+ }
+ bool isStartPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+ }
+ bool isEndPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+ }
+ void setHasClobberLR(bool v) { HasClobberLR = v; }
+ bool hasClobberLR() const { return HasClobberLR; }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
new file mode 100644
index 000000000000..55cbc094a2ad
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -0,0 +1,288 @@
+//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This peephole pass optimizes in the following cases.
+// 1. Optimizes redundant sign extends for the following case
+// Transform the following pattern
+// %vreg170<def> = SXTW %vreg166
+// ...
+// %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+// Into
+// %vreg176<def> = COPY vreg166
+//
+// 2. Optimizes redundant negation of predicates.
+// %vreg15<def> = CMPGTrr %vreg6, %vreg2
+// ...
+// %vreg16<def> = NOT_p %vreg15<kill>
+// ...
+// JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead>
+//
+// Into
+// %vreg15<def> = CMPGTrr %vreg6, %vreg2;
+// ...
+// JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>;
+//
+// Note: The peephole pass makes the instrucstions like
+// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
+// redundant and relies on some form of dead removal instrucions, like
+// DCE or DIE to actually eliminate them.
+
+
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-peephole"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Peephole Optimization"));
+
+static cl::opt<int>
+DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden,
+ cl::desc("Maximum number of P=NOT(P) to be optimized"));
+
+static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of PNotP"));
+
+static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of Sign/Zero Extends"));
+
+namespace {
+ struct HexagonPeephole : public MachineFunctionPass {
+ const HexagonInstrInfo *QII;
+ const HexagonRegisterInfo *QRI;
+ const MachineRegisterInfo *MRI;
+
+ public:
+ static char ID;
+ HexagonPeephole() : MachineFunctionPass(ID) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Hexagon optimize redundant zero and size extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
+ };
+}
+
+char HexagonPeephole::ID = 0;
+
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+
+ QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
+ getInstrInfo());
+ QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
+ getRegisterInfo());
+ MRI = &MF.getRegInfo();
+
+ DenseMap<unsigned, unsigned> PeepholeMap;
+
+ if (DisableHexagonPeephole) return false;
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ PeepholeMap.clear();
+
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ // Look for sign extends:
+ // %vreg170<def> = SXTW %vreg166
+ if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = SXTW %vreg166
+ // PeepholeMap[170] = vreg166
+ PeepholeMap[DstReg] = SrcReg;
+ }
+ }
+
+ // Look for P=NOT(P).
+ if (!DisablePNotP &&
+ (MI->getOpcode() == Hexagon::NOT_p)) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = NOT_xx %vreg166
+ // PeepholeMap[170] = vreg166
+ PeepholeMap[DstReg] = SrcReg;
+ }
+ }
+
+ // Look for copy:
+ // %vreg176<def> = COPY %vreg170:subreg_loreg
+ if (!DisableOptSZExt && MI->isCopy()) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+
+ // Make sure we are copying the lower 32 bits.
+ if (Src.getSubReg() != Hexagon::subreg_loreg)
+ continue;
+
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Try to find in the map.
+ if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
+ // Change the 1st operand.
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+ }
+ }
+ }
+
+ // Look for Predicated instructions.
+ if (!DisablePNotP) {
+ bool Done = false;
+ if (QII->isPredicated(MI)) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ unsigned Reg0 = Op0.getReg();
+ const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
+ if (RC0->getID() == Hexagon::PredRegsRegClassID) {
+ // Handle instructions that have a prediate register in op0
+ // (most cases of predicable instructions).
+ if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
+ // Try to find in the map.
+ if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
+ // Change the 1st operand and, flip the opcode.
+ MI->getOperand(0).setReg(PeepholeSrc);
+ int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
+ MI->setDesc(QII->get(NewOp));
+ Done = true;
+ }
+ }
+ }
+ }
+
+ if (!Done) {
+ // Handle special instructions.
+ unsigned Op = MI->getOpcode();
+ unsigned NewOp = 0;
+ unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices.
+
+ switch (Op) {
+ case Hexagon::TFR_condset_rr:
+ case Hexagon::TFR_condset_ii:
+ case Hexagon::MUX_ii:
+ case Hexagon::MUX_rr:
+ NewOp = Op;
+ break;
+ case Hexagon::TFR_condset_ri:
+ NewOp = Hexagon::TFR_condset_ir;
+ break;
+ case Hexagon::TFR_condset_ir:
+ NewOp = Hexagon::TFR_condset_ri;
+ break;
+ case Hexagon::MUX_ri:
+ NewOp = Hexagon::MUX_ir;
+ break;
+ case Hexagon::MUX_ir:
+ NewOp = Hexagon::MUX_ri;
+ break;
+ }
+ if (NewOp) {
+ unsigned PSrc = MI->getOperand(PR).getReg();
+ if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
+ MI->getOperand(PR).setReg(POrig);
+ MI->setDesc(QII->get(NewOp));
+ // Swap operands S1 and S2.
+ MachineOperand Op1 = MI->getOperand(S1);
+ MachineOperand Op2 = MI->getOperand(S2);
+ ChangeOpInto(MI->getOperand(S1), Op2);
+ ChangeOpInto(MI->getOperand(S2), Op1);
+ }
+ } // if (NewOp)
+ } // if (!Done)
+
+ } // if (!DisablePNotP)
+
+ } // Instruction
+ } // Basic Block
+ return true;
+}
+
+void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
+ assert (&Dst != &Src && "Cannot duplicate into itself");
+ switch (Dst.getType()) {
+ case MachineOperand::MO_Register:
+ if (Src.isReg()) {
+ Dst.setReg(Src.getReg());
+ } else if (Src.isImm()) {
+ Dst.ChangeToImmediate(Src.getImm());
+ } else {
+ llvm_unreachable("Unexpected src operand type");
+ }
+ break;
+
+ case MachineOperand::MO_Immediate:
+ if (Src.isImm()) {
+ Dst.setImm(Src.getImm());
+ } else if (Src.isReg()) {
+ Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
+ Src.isKill(), Src.isDead(), Src.isUndef(),
+ Src.isDebug());
+ } else {
+ llvm_unreachable("Unexpected src operand type");
+ }
+ break;
+
+ default:
+ llvm_unreachable("Unexpected dst operand type");
+ break;
+ }
+}
+
+FunctionPass *llvm::createHexagonPeephole() {
+ return new HexagonPeephole();
+}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 000000000000..2a9de9232915
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,315 @@
+//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+
+HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st,
+ const HexagonInstrInfo &tii)
+ : HexagonGenRegisterInfo(Hexagon::R31),
+ Subtarget(st),
+ TII(tii) {
+}
+
+const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
+ *MF)
+ const {
+ static const uint16_t CalleeSavedRegsV2[] = {
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+ static const uint16_t CalleeSavedRegsV3[] = {
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V1:
+ break;
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegsV2;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ return CalleeSavedRegsV3;
+ }
+ llvm_unreachable("Callee saved registers requested for unknown architecture "
+ "version");
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+ const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(HEXAGON_RESERVED_REG_1);
+ Reserved.set(HEXAGON_RESERVED_REG_2);
+ Reserved.set(Hexagon::R29);
+ Reserved.set(Hexagon::R30);
+ Reserved.set(Hexagon::R31);
+ Reserved.set(Hexagon::D14);
+ Reserved.set(Hexagon::D15);
+ Reserved.set(Hexagon::LC0);
+ Reserved.set(Hexagon::LC1);
+ Reserved.set(Hexagon::SA0);
+ Reserved.set(Hexagon::SA1);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V1:
+ break;
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegClassesV2;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ return CalleeSavedRegClassesV3;
+ }
+ llvm_unreachable("Callee saved register classes requested for unknown "
+ "architecture version");
+}
+
+void HexagonRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+ // Hexagon_TODO: add code
+ } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+ // Hexagon_TODO: add code
+ } else {
+ llvm_unreachable("Cannot handle this call frame pseudo instruction");
+ }
+ MBB.erase(I);
+}
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+
+ //
+ // Hexagon_TODO: Do we need to enforce this for Hexagon?
+ assert(SPAdj == 0 && "Unexpected");
+
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ unsigned FrameReg = getFrameRegister(MF);
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasFP(MF)) {
+ // We will not reserve space on the stack for the lr and fp registers.
+ Offset -= 2 * Hexagon_WordSize;
+ }
+
+ const unsigned FrameSize = MFI.getStackSize();
+
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
+ !TII.isSpillPredRegOp(&MI)) {
+ // Replace frame index with a stack pointer reference.
+ MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ } else {
+ // Replace frame index with a frame pointer reference.
+ if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
+
+ // If the offset overflows, then correct it.
+ //
+ // For loads, we do not need a reserved register
+ // r0 = memw(r30 + #10000) to:
+ //
+ // r0 = add(r30, #10000)
+ // r0 = memw(r0)
+ if ( (MI.getOpcode() == Hexagon::LDriw) ||
+ (MI.getOpcode() == Hexagon::LDrid) ||
+ (MI.getOpcode() == Hexagon::LDrih) ||
+ (MI.getOpcode() == Hexagon::LDriuh) ||
+ (MI.getOpcode() == Hexagon::LDrib) ||
+ (MI.getOpcode() == Hexagon::LDriub) ) {
+ unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+ *getSubRegisters(MI.getOperand(0).getReg()) :
+ MI.getOperand(0).getReg();
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ dstReg).addReg(FrameReg).addImm(Offset);
+ }
+
+ MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else if ((MI.getOpcode() == Hexagon::STriw) ||
+ (MI.getOpcode() == Hexagon::STrid) ||
+ (MI.getOpcode() == Hexagon::STrih) ||
+ (MI.getOpcode() == Hexagon::STrib)) {
+ // For stores, we need a reserved register. Change
+ // memw(r30 + #10000) = r0 to:
+ //
+ // rs = add(r30, #10000);
+ // memw(rs) = r0
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ }
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else if (TII.isMemOp(&MI)) {
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+ MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
+ true);
+ MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ }
+ } else {
+ unsigned dstReg = MI.getOperand(0).getReg();
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ // Can we delete MI??? r2 = add (r2, #0).
+ MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ }
+ } else {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ }
+ }
+
+}
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+ return Hexagon::R31;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+ &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (TFI->hasFP(MF)) {
+ return Hexagon::R30;
+ }
+
+ return Hexagon::R29;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+ return Hexagon::R30;
+}
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+ return Hexagon::R29;
+}
+
+void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
+ &Moves) const
+{
+ // VirtualFP = (R30 + #0).
+ unsigned FPReg = getFrameRegister();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+}
+
+unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 000000000000..6cf727bc027d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,90 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonREGISTERINFO_H
+#define HexagonREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+
+//
+// We try not to hard code the reserved registers in our code,
+// so the following two macros were defined. However, there
+// are still a few places that R11 and R10 are hard wired.
+// See below. If, in the future, we decided to change the reserved
+// register. Don't forget changing the following places.
+//
+// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+// 3. the definition of "IntRegs" in HexagonRegisterInfo.td
+// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+
+class HexagonSubtarget;
+class HexagonInstrInfo;
+class Type;
+
+struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
+ HexagonSubtarget &Subtarget;
+ const HexagonInstrInfo &TII;
+
+ HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// requiresRegisterScavenging - returns true since we may need scavenging for
+ /// a temporary register when generating hardware loop instructions.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister() const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ unsigned getStackRegister() const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 000000000000..d44eae3602c9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,167 @@
+//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Hexagon" in {
+
+ class HexagonReg<string n> : Register<n> {
+ field bits<5> Num;
+ }
+
+ class HexagonDoubleReg<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ }
+
+ // Registers are identified with 5-bit ID numbers.
+ // Ri - 32-bit integer registers.
+ class Ri<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+ // Rf - 32-bit floating-point registers.
+ class Rf<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+
+ // Rd - 64-bit registers.
+ class Rd<bits<5> num, string n, list<Register> subregs> :
+ HexagonDoubleReg<n, subregs> {
+ let Num = num;
+ let SubRegs = subregs;
+ }
+
+ // Rp - predicate registers
+ class Rp<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+ // Rc - control registers
+ class Rc<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+ }
+
+ // Rj - aliased integer registers
+ class Rj<string n, Ri R>: HexagonReg<n> {
+ let Num = R.Num;
+ let Aliases = [R];
+ }
+
+ def subreg_loreg : SubRegIndex;
+ def subreg_hireg : SubRegIndex;
+
+ // Integer registers.
+ def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+ def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+ def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
+ def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
+ def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
+ def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
+ def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
+ def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
+ def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
+ def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
+ def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
+ def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
+ def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
+ def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
+ def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
+ def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
+ def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
+ def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
+ def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
+ def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
+ def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
+ def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+
+ def SP : Rj<"sp", R29>, DwarfRegNum<[29]>;
+ def FP : Rj<"fp", R30>, DwarfRegNum<[30]>;
+ def LR : Rj<"lr", R31>, DwarfRegNum<[31]>;
+
+ // Aliases of the R* registers used to hold 64-bit int values (doubles).
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
+ def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
+ def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
+ def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
+ def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
+ def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+ def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+ def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+ def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+ def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+ def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+ def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+ def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+ def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+ def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+ def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+ }
+
+ // Predicate registers.
+ def P0 : Rp<0, "p0">, DwarfRegNum<[63]>;
+ def P1 : Rp<1, "p1">, DwarfRegNum<[64]>;
+ def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
+ def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
+
+ // Control registers.
+ def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
+ def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>;
+
+ def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>;
+ def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>;
+
+ def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct?
+ def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct?
+}
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add (sequence "R%u", 0, 9),
+ (sequence "R%u", 12, 28),
+ R10, R11, R29, R30, R31)> {
+}
+
+
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64], 64,
+ (add (sequence "D%u", 0, 4),
+ (sequence "D%u", 6, 13), D5, D14, D15)> {
+ let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)];
+}
+
+
+def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+{
+ let Size = 32;
+}
+
+def CRRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add (sequence "LC%u", 0, 1),
+ (sequence "SA%u", 0, 1), PC, GP)> {
+ let Size = 32;
+}
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
new file mode 100644
index 000000000000..66a00e12dd09
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -0,0 +1,82 @@
+//===- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+namespace {
+ struct HexagonRemoveExtendArgs : public FunctionPass {
+ public:
+ static char ID;
+ HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Remove sign extends";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char HexagonRemoveExtendArgs::ID = 0;
+RegisterPass<HexagonRemoveExtendArgs> X("reargs",
+ "Remove Sign and Zero Extends for Args"
+ );
+
+
+
+bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+ ++AI, ++Idx) {
+ if (F.paramHasAttr(Idx, Attribute::SExt)) {
+ Argument* Arg = AI;
+ if (!isa<PointerType>(Arg->getType())) {
+ for (Instruction::use_iterator UI = Arg->use_begin();
+ UI != Arg->use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = F.getEntryBlock().begin();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+
+
+FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+ return new HexagonRemoveExtendArgs();
+}
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 000000000000..c4887963895c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,54 @@
+//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Functional Units
+def LUNIT : FuncUnit;
+def LSUNIT : FuncUnit;
+def MUNIT : FuncUnit;
+def SUNIT : FuncUnit;
+
+// Itinerary classes
+def ALU32 : InstrItinClass;
+def ALU64 : InstrItinClass;
+def CR : InstrItinClass;
+def J : InstrItinClass;
+def JR : InstrItinClass;
+def LD : InstrItinClass;
+def M : InstrItinClass;
+def ST : InstrItinClass;
+def S : InstrItinClass;
+def SYS : InstrItinClass;
+def MARKER : InstrItinClass;
+def PSEUDO : InstrItinClass;
+
+def HexagonItineraries :
+ ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
+ InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
+ InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 000000000000..1d82dbb90e91
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,59 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+// Functional Units.
+def SLOT0 : FuncUnit;
+def SLOT1 : FuncUnit;
+def SLOT2 : FuncUnit;
+def SLOT3 : FuncUnit;
+
+// Itinerary classes.
+def NV_V4 : InstrItinClass;
+def MEM_V4 : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def PREFIX : InstrItinClass;
+
+def HexagonItinerariesV4 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
+ InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 000000000000..d8feb89c0ab5
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGT)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGT)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULT)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLT)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGE)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGE)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+// pt = not(p1 xor p2)
+// Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+ IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETGT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETLT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a52c604505b8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,46 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+#include "HexagonTargetMachine.h"
+using namespace llvm;
+
+bool llvm::flag_aligned_memcpy;
+
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
+ &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
+}
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ flag_aligned_memcpy = false;
+ if ((Align & 0x3) == 0) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if ((SizeVal > 32) && ((SizeVal % 8) == 0))
+ flag_aligned_memcpy = true;
+ }
+ }
+
+ return SDValue();
+}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 000000000000..0673e4d35472
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,40 @@
+//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonSELECTIONDAGINFO_H
+#define HexagonSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonTargetMachine;
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+ ~HexagonSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
new file mode 100644
index 000000000000..d10c9f2d5242
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -0,0 +1,129 @@
+//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+// This pass tries to provide opportunities for better optimization of muxes.
+// The default code generated for something like: flag = (a == b) ? 1 : 3;
+// would be:
+//
+// {p0 = cmp.eq(r0,r1)}
+// {r3 = mux(p0,#1,#3)}
+//
+// This requires two packets. If we use .new predicated immediate transfers,
+// then we can do this in a single packet, e.g.:
+//
+// {p0 = cmp.eq(r0,r1)
+// if (p0.new) r3 = #1
+// if (!p0.new) r3 = #3}
+//
+// Note that the conditional assignments are not generated in .new form here.
+// We assume opptimisically that they will be formed later.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xfer"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonSplitTFRCondSets : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Split TFRCondSets";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonSplitTFRCondSets::ID = 0;
+
+
+bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
+
+ const TargetInstrInfo *TII = QTM.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::TFR_condset_rr) {
+
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ // Minor optimization: do not emit the predicated copy if the source and
+ // the destination is the same register
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::TFR_condset_ii) {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(1).getReg();
+ int Immed1 = MI->getOperand(2).getImm();
+ int Immed2 = MI->getOperand(3).getImm();
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt),
+ DestReg).addReg(SrcReg1).addImm(Immed1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt),
+ DestReg).addReg(SrcReg1).addImm(Immed2);
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+ return new HexagonSplitTFRCondSets(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 000000000000..654d33626edf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,62 @@
+//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableV3("enable-hexagon-v3", cl::Hidden,
+ cl::desc("Enable Hexagon V3 instructions."));
+
+static cl::opt<bool>
+EnableMemOps(
+ "enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
+ cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
+ HexagonGenSubtargetInfo(TT, CPU, FS),
+ HexagonArchVersion(V1),
+ CPUString(CPU.str()) {
+ ParseSubtargetFeatures(CPU, FS);
+
+ switch(HexagonArchVersion) {
+ case HexagonSubtarget::V2:
+ break;
+ case HexagonSubtarget::V3:
+ EnableV3 = true;
+ break;
+ case HexagonSubtarget::V4:
+ break;
+ default:
+ llvm_unreachable("Unknown Architecture Version.");
+ }
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ // Max issue per cycle == bundle width.
+ InstrItins.IssueWidth = 4;
+
+ if (EnableMemOps)
+ UseMemOps = true;
+ else
+ UseMemOps = false;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 000000000000..3079086986d9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,74 @@
+//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_SUBTARGET_H
+#define Hexagon_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+
+ bool UseMemOps;
+
+public:
+ enum HexagonArchEnum {
+ V1, V2, V3, V4
+ };
+
+ HexagonArchEnum HexagonArchVersion;
+ std::string CPUString;
+ InstrItineraryData InstrItins;
+
+public:
+ HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasV2TOps () const { return HexagonArchVersion >= V2; }
+ bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
+ bool hasV3TOps () const { return HexagonArchVersion >= V3; }
+ bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
+ bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+ bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+
+ bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+ const std::string &getCPUString () const { return CPUString; }
+
+ // Threshold for small data section
+ unsigned getSmallDataThreshold() const {
+ return Hexagon_SMALL_DATA_THRESHOLD;
+ }
+ const HexagonArchEnum &getHexagonArchVersion() const {
+ return HexagonArchVersion;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 000000000000..411325bf963e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,145 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Hexagon target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableHardwareLoops(
+ "disable-hexagon-hwloops", cl::Hidden,
+ cl::desc("Disable Hardware Loops for Hexagon target"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") ,
+ Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ setMCUseCFI(false);
+}
+
+// addPassesForOptimizations - Allow the backend (target) to add Target
+// Independent Optimization passes to the Pass Manager.
+bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
+
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopSimplifyPass());
+ PM.add(createDeadCodeEliminationPass());
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopUnrollPass());
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ return true;
+}
+
+namespace {
+/// Hexagon Code Generator Pass Configuration Options.
+class HexagonPassConfig : public TargetPassConfig {
+public:
+ HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ HexagonTargetMachine &getHexagonTargetMachine() const {
+ return getTM<HexagonTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new HexagonPassConfig(this, PM);
+}
+
+bool HexagonPassConfig::addInstSelector() {
+ PM.add(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+ PM.add(createHexagonISelDag(getHexagonTargetMachine()));
+ PM.add(createHexagonPeephole());
+ return false;
+}
+
+
+bool HexagonPassConfig::addPreRegAlloc() {
+ if (!DisableHardwareLoops) {
+ PM.add(createHexagonHardwareLoops());
+ }
+
+ return false;
+}
+
+bool HexagonPassConfig::addPostRegAlloc() {
+ PM.add(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+ return true;
+}
+
+
+bool HexagonPassConfig::addPreSched2() {
+ addPass(IfConverterID);
+ return true;
+}
+
+bool HexagonPassConfig::addPreEmitPass() {
+
+ if (!DisableHardwareLoops) {
+ PM.add(createHexagonFixupHwLoops());
+ }
+
+ // Expand Spill code for predicate registers.
+ PM.add(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+
+ // Split up TFRcondsets into conditional transfers.
+ PM.add(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+
+ // Create Packets.
+ PM.add(createHexagonPacketizer());
+
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 000000000000..0336965d11f1
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,83 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETMACHINE_H
+#define HexagonTARGETMACHINE_H
+
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "HexagonFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment.
+ HexagonSubtarget Subtarget;
+ HexagonInstrInfo InstrInfo;
+ HexagonTargetLowering TLInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
+ const InstrItineraryData* InstrItins;
+
+public:
+ HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ virtual const HexagonInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const HexagonSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const HexagonRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+
+ virtual const HexagonTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const HexagonFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration.
+ virtual bool addPassesForOptimizations(PassManagerBase &PM);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+extern bool flag_aligned_memcpy;
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 000000000000..32cc70958638
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+ cl::init(8), cl::Hidden);
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // If the primary definition of this global value is outside the current
+ // translation unit or the global value is available for inspection but not
+ // emission, then do nothing.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+ // up in getKindForGlobal(GV, TM).
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+ }
+
+ return false;
+}
+
+const MCSection *HexagonTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 000000000000..693345081ee3
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETOBJECTFILE_H
+#define HexagonTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+ class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSectionELF *SmallDataSection;
+ const MCSectionELF *SmallBSSSection;
+ public:
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection* SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
new file mode 100644
index 000000000000..c6e7bd1f53d6
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -0,0 +1,3642 @@
+//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple VLIW packetizer using DFA. The packetizer works on
+// machine basic blocks. For each instruction I in BB, the packetizer consults
+// the DFA to see if machine resources are available to execute I. If so, the
+// packetizer checks if I depends on any instruction J in the current packet.
+// If no dependency is found, I is added to current packet and machine resource
+// is marked as taken. If any dependency is found, a target API call is made to
+// prune the dependence.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "packets"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+
+#include <map>
+
+using namespace llvm;
+
+namespace {
+ class HexagonPacketizer : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ HexagonPacketizer() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Hexagon Packetizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char HexagonPacketizer::ID = 0;
+
+ class HexagonPacketizerList : public VLIWPacketizerList {
+
+ private:
+
+ // Has the instruction been promoted to a dot-new instruction.
+ bool PromotedToDotNew;
+
+ // Has the instruction been glued to allocframe.
+ bool GlueAllocframeStore;
+
+ // Has the feeder instruction been glued to new value jump.
+ bool GlueToNewValueJump;
+
+ // Check if there is a dependence between some instruction already in this
+ // packet and this instruction.
+ bool Dependence;
+
+ // Only check for dependence if there are resources available to
+ // schedule this instruction.
+ bool FoundSequentialDependence;
+
+ public:
+ // Ctor.
+ HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT);
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState();
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(MachineInstr *MI);
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ);
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ);
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI);
+ private:
+ bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
+ bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII);
+ bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
+ bool DemoteToDotOld(MachineInstr* MI);
+ bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
+ bool RestrictingDepExistInPacket(MachineInstr*,
+ unsigned, std::map <MachineInstr*, SUnit*>);
+ bool isNewifiable(MachineInstr* MI);
+ bool isCondInst(MachineInstr* MI);
+ bool IsNewifyStore (MachineInstr* MI);
+ bool tryAllocateResourcesForConstExt(MachineInstr* MI);
+ bool canReserveResourcesForConstExt(MachineInstr *MI);
+ void reserveResourcesForConstExt(MachineInstr* MI);
+ bool isNewValueInst(MachineInstr* MI);
+ bool isDotNewInst(MachineInstr* MI);
+ };
+}
+
+// HexagonPacketizerList Ctor.
+HexagonPacketizerList::HexagonPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+ : VLIWPacketizerList(MF, MLI, MDT, true){
+}
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+ // Instantiate the packetizer.
+ HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+
+ // DFA state table should not be empty.
+ assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+ //
+ // Loop over all basic blocks and remove KILL pseudo-instructions
+ // These instructions confuse the dependence analysis. Consider:
+ // D0 = ... (Insn 0)
+ // R0 = KILL R0, D0 (Insn 1)
+ // R0 = ... (Insn 2)
+ // Here, Insn 1 will result in the dependence graph not emitting an output
+ // dependence between Insn 0 and Insn 2. This can lead to incorrect
+ // packetization
+ //
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ MachineBasicBlock::iterator End = MBB->end();
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != End) {
+ if (MI->isKill()) {
+ MachineBasicBlock::iterator DeleteMI = MI;
+ ++MI;
+ MBB->erase(DeleteMI);
+ End = MBB->end();
+ continue;
+ }
+ ++MI;
+ }
+ }
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // Find scheduling regions and schedule / packetize each region.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin();) {
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+ break;
+ }
+ I = MBB->begin();
+
+ // Skip empty scheduling regions.
+ if (I == RegionEnd) {
+ RegionEnd = llvm::prior(RegionEnd);
+ --RemainingCount;
+ continue;
+ }
+ // Skip regions with one instruction.
+ if (I == llvm::prior(RegionEnd)) {
+ RegionEnd = llvm::prior(RegionEnd);
+ continue;
+ }
+
+ Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+ RegionEnd = I;
+ }
+ }
+
+ return true;
+}
+
+
+static bool IsIndirectCall(MachineInstr* MI) {
+ return ((MI->getOpcode() == Hexagon::CALLR) ||
+ (MI->getOpcode() == Hexagon::CALLRv3));
+}
+
+// Reserve resources for constant extender. Trigure an assertion if
+// reservation fail.
+void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
+ QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+
+ if (ResourceTracker->canReserveResources(PseudoMI)) {
+ ResourceTracker->reserveResources(PseudoMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ } else {
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ llvm_unreachable("can not reserve resources for constant extender.");
+ }
+ return;
+}
+
+bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ assert(QII->isExtended(MI) &&
+ "Should only be called for constant extended instructions");
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT),
+ MI->getDebugLoc());
+ bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
+ MF->DeleteMachineInstr(PseudoMI);
+ return CanReserve;
+}
+
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
+// true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
+ QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+
+ if (ResourceTracker->canReserveResources(PseudoMI)) {
+ ResourceTracker->reserveResources(PseudoMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ return true;
+ } else {
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ return false;
+ }
+}
+
+
+bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
+ SDep::Kind DepType,
+ unsigned DepReg) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+
+ // Check for lr dependence
+ if (DepReg == QRI->getRARegister()) {
+ return true;
+ }
+
+ if (QII->isDeallocRet(MI)) {
+ if (DepReg == QRI->getFrameRegister() ||
+ DepReg == QRI->getStackRegister())
+ return true;
+ }
+
+ // Check if this is a predicate dependence
+ const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
+ if (RC == Hexagon::PredRegsRegisterClass) {
+ return true;
+ }
+
+ //
+ // Lastly check for an operand used in an indirect call
+ // If we had an attribute for checking if an instruction is an indirect call,
+ // then we could have avoided this relatively brittle implementation of
+ // IsIndirectCall()
+ //
+ // Assumes that the first operand of the CALLr is the function address
+ //
+ if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+ MachineOperand MO = MI->getOperand(0);
+ if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool IsRegDependence(const SDep::Kind DepType) {
+ return (DepType == SDep::Data || DepType == SDep::Anti ||
+ DepType == SDep::Output);
+}
+
+static bool IsDirectJump(MachineInstr* MI) {
+ return (MI->getOpcode() == Hexagon::JMP);
+}
+
+static bool IsSchedBarrier(MachineInstr* MI) {
+ switch (MI->getOpcode()) {
+ case Hexagon::BARRIER:
+ return true;
+ }
+ return false;
+}
+
+static bool IsControlFlow(MachineInstr* MI) {
+ return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+}
+
+bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ if (QII->isNewValueJump(MI))
+ return true;
+
+ if (QII->isNewValueStore(MI))
+ return true;
+
+ return false;
+}
+
+// Function returns true if an instruction can be promoted to the new-value
+// store. It will always return false for v2 and v3.
+// It lists all the conditional and unconditional stores that can be promoted
+// to the new-value stores.
+
+bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
+ const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ // store byte
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_indexed_shl_V4:
+ case Hexagon::STrib_shl_V4:
+ case Hexagon::STrib_GP_V4:
+ case Hexagon::STb_GP_V4:
+ case Hexagon::POST_STbri:
+ case Hexagon::STrib_cPt:
+ case Hexagon::STrib_cdnPt_V4:
+ case Hexagon::STrib_cNotPt:
+ case Hexagon::STrib_cdnNotPt_V4:
+ case Hexagon::STrib_indexed_cPt:
+ case Hexagon::STrib_indexed_cdnPt_V4:
+ case Hexagon::STrib_indexed_cNotPt:
+ case Hexagon::STrib_indexed_cdnNotPt_V4:
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ case Hexagon::STrib_indexed_shl_cdnPt_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_STbri_cPt:
+ case Hexagon::POST_STbri_cdnPt_V4:
+ case Hexagon::POST_STbri_cNotPt:
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+ case Hexagon::STb_GP_cPt_V4:
+ case Hexagon::STb_GP_cNotPt_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STrib_GP_cPt_V4:
+ case Hexagon::STrib_GP_cNotPt_V4:
+ case Hexagon::STrib_GP_cdnPt_V4:
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+
+ // store halfword
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_indexed_shl_V4:
+ case Hexagon::STrih_shl_V4:
+ case Hexagon::STrih_GP_V4:
+ case Hexagon::STh_GP_V4:
+ case Hexagon::POST_SThri:
+ case Hexagon::STrih_cPt:
+ case Hexagon::STrih_cdnPt_V4:
+ case Hexagon::STrih_cNotPt:
+ case Hexagon::STrih_cdnNotPt_V4:
+ case Hexagon::STrih_indexed_cPt:
+ case Hexagon::STrih_indexed_cdnPt_V4:
+ case Hexagon::STrih_indexed_cNotPt:
+ case Hexagon::STrih_indexed_cdnNotPt_V4:
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ case Hexagon::STrih_indexed_shl_cdnPt_V4:
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_SThri_cPt:
+ case Hexagon::POST_SThri_cdnPt_V4:
+ case Hexagon::POST_SThri_cNotPt:
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+ case Hexagon::STh_GP_cPt_V4:
+ case Hexagon::STh_GP_cNotPt_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STrih_GP_cPt_V4:
+ case Hexagon::STrih_GP_cNotPt_V4:
+ case Hexagon::STrih_GP_cdnPt_V4:
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+
+ // store word
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_indexed_shl_V4:
+ case Hexagon::STriw_shl_V4:
+ case Hexagon::STriw_GP_V4:
+ case Hexagon::STw_GP_V4:
+ case Hexagon::POST_STwri:
+ case Hexagon::STriw_cPt:
+ case Hexagon::STriw_cdnPt_V4:
+ case Hexagon::STriw_cNotPt:
+ case Hexagon::STriw_cdnNotPt_V4:
+ case Hexagon::STriw_indexed_cPt:
+ case Hexagon::STriw_indexed_cdnPt_V4:
+ case Hexagon::STriw_indexed_cNotPt:
+ case Hexagon::STriw_indexed_cdnNotPt_V4:
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ case Hexagon::STriw_indexed_shl_cdnPt_V4:
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_STwri_cPt:
+ case Hexagon::POST_STwri_cdnPt_V4:
+ case Hexagon::POST_STwri_cNotPt:
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+ case Hexagon::STw_GP_cPt_V4:
+ case Hexagon::STw_GP_cNotPt_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ case Hexagon::STriw_GP_cPt_V4:
+ case Hexagon::STriw_GP_cNotPt_V4:
+ case Hexagon::STriw_GP_cdnPt_V4:
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ return QRI->Subtarget.hasV4TOps();
+ }
+ return false;
+}
+
+static bool IsLoopN(MachineInstr *MI) {
+ return (MI->getOpcode() == Hexagon::LOOP0_i ||
+ MI->getOpcode() == Hexagon::LOOP0_r);
+}
+
+/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
+/// callee-saved register.
+static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ unsigned CalleeSavedReg = *CSR;
+ if (MI->modifiesRegister(CalleeSavedReg, TRI))
+ return true;
+ }
+ return false;
+}
+
+// Return the new value instruction for a given store.
+static int GetDotNewOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .new type");
+
+ // store new value byte
+ case Hexagon::STrib:
+ return Hexagon::STrib_nv_V4;
+
+ case Hexagon::STrib_indexed:
+ return Hexagon::STrib_indexed_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_V4:
+ return Hexagon::STrib_indexed_shl_nv_V4;
+
+ case Hexagon::STrib_shl_V4:
+ return Hexagon::STrib_shl_nv_V4;
+
+ case Hexagon::STrib_GP_V4:
+ return Hexagon::STrib_GP_nv_V4;
+
+ case Hexagon::STb_GP_V4:
+ return Hexagon::STb_GP_nv_V4;
+
+ case Hexagon::POST_STbri:
+ return Hexagon::POST_STbri_nv_V4;
+
+ case Hexagon::STrib_cPt:
+ return Hexagon::STrib_cPt_nv_V4;
+
+ case Hexagon::STrib_cdnPt_V4:
+ return Hexagon::STrib_cdnPt_nv_V4;
+
+ case Hexagon::STrib_cNotPt:
+ return Hexagon::STrib_cNotPt_nv_V4;
+
+ case Hexagon::STrib_cdnNotPt_V4:
+ return Hexagon::STrib_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cPt:
+ return Hexagon::STrib_indexed_cPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cdnPt_V4:
+ return Hexagon::STrib_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cNotPt:
+ return Hexagon::STrib_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cdnNotPt_V4:
+ return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ return Hexagon::STrib_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnPt_V4:
+ return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cPt:
+ return Hexagon::POST_STbri_cPt_nv_V4;
+
+ case Hexagon::POST_STbri_cdnPt_V4:
+ return Hexagon::POST_STbri_cdnPt_nv_V4;
+
+ case Hexagon::POST_STbri_cNotPt:
+ return Hexagon::POST_STbri_cNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+ return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cPt_V4:
+ return Hexagon::STb_GP_cPt_nv_V4;
+
+ case Hexagon::STb_GP_cNotPt_V4:
+ return Hexagon::STb_GP_cNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cdnPt_V4:
+ return Hexagon::STb_GP_cdnPt_nv_V4;
+
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_GP_cPt_V4:
+ return Hexagon::STrib_GP_cPt_nv_V4;
+
+ case Hexagon::STrib_GP_cNotPt_V4:
+ return Hexagon::STrib_GP_cNotPt_nv_V4;
+
+ case Hexagon::STrib_GP_cdnPt_V4:
+ return Hexagon::STrib_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+ return Hexagon::STrib_GP_cdnNotPt_nv_V4;
+
+ // store new value halfword
+ case Hexagon::STrih:
+ return Hexagon::STrih_nv_V4;
+
+ case Hexagon::STrih_indexed:
+ return Hexagon::STrih_indexed_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_V4:
+ return Hexagon::STrih_indexed_shl_nv_V4;
+
+ case Hexagon::STrih_shl_V4:
+ return Hexagon::STrih_shl_nv_V4;
+
+ case Hexagon::STrih_GP_V4:
+ return Hexagon::STrih_GP_nv_V4;
+
+ case Hexagon::STh_GP_V4:
+ return Hexagon::STh_GP_nv_V4;
+
+ case Hexagon::POST_SThri:
+ return Hexagon::POST_SThri_nv_V4;
+
+ case Hexagon::STrih_cPt:
+ return Hexagon::STrih_cPt_nv_V4;
+
+ case Hexagon::STrih_cdnPt_V4:
+ return Hexagon::STrih_cdnPt_nv_V4;
+
+ case Hexagon::STrih_cNotPt:
+ return Hexagon::STrih_cNotPt_nv_V4;
+
+ case Hexagon::STrih_cdnNotPt_V4:
+ return Hexagon::STrih_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cPt:
+ return Hexagon::STrih_indexed_cPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cdnPt_V4:
+ return Hexagon::STrih_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cNotPt:
+ return Hexagon::STrih_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cdnNotPt_V4:
+ return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ return Hexagon::STrih_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnPt_V4:
+ return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cPt:
+ return Hexagon::POST_SThri_cPt_nv_V4;
+
+ case Hexagon::POST_SThri_cdnPt_V4:
+ return Hexagon::POST_SThri_cdnPt_nv_V4;
+
+ case Hexagon::POST_SThri_cNotPt:
+ return Hexagon::POST_SThri_cNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+ return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cPt_V4:
+ return Hexagon::STh_GP_cPt_nv_V4;
+
+ case Hexagon::STh_GP_cNotPt_V4:
+ return Hexagon::STh_GP_cNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cdnPt_V4:
+ return Hexagon::STh_GP_cdnPt_nv_V4;
+
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_GP_cPt_V4:
+ return Hexagon::STrih_GP_cPt_nv_V4;
+
+ case Hexagon::STrih_GP_cNotPt_V4:
+ return Hexagon::STrih_GP_cNotPt_nv_V4;
+
+ case Hexagon::STrih_GP_cdnPt_V4:
+ return Hexagon::STrih_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+ return Hexagon::STrih_GP_cdnNotPt_nv_V4;
+
+ // store new value word
+ case Hexagon::STriw:
+ return Hexagon::STriw_nv_V4;
+
+ case Hexagon::STriw_indexed:
+ return Hexagon::STriw_indexed_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_V4:
+ return Hexagon::STriw_indexed_shl_nv_V4;
+
+ case Hexagon::STriw_shl_V4:
+ return Hexagon::STriw_shl_nv_V4;
+
+ case Hexagon::STriw_GP_V4:
+ return Hexagon::STriw_GP_nv_V4;
+
+ case Hexagon::STw_GP_V4:
+ return Hexagon::STw_GP_nv_V4;
+
+ case Hexagon::POST_STwri:
+ return Hexagon::POST_STwri_nv_V4;
+
+ case Hexagon::STriw_cPt:
+ return Hexagon::STriw_cPt_nv_V4;
+
+ case Hexagon::STriw_cdnPt_V4:
+ return Hexagon::STriw_cdnPt_nv_V4;
+
+ case Hexagon::STriw_cNotPt:
+ return Hexagon::STriw_cNotPt_nv_V4;
+
+ case Hexagon::STriw_cdnNotPt_V4:
+ return Hexagon::STriw_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cPt:
+ return Hexagon::STriw_indexed_cPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cdnPt_V4:
+ return Hexagon::STriw_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cNotPt:
+ return Hexagon::STriw_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cdnNotPt_V4:
+ return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ return Hexagon::STriw_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnPt_V4:
+ return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cPt:
+ return Hexagon::POST_STwri_cPt_nv_V4;
+
+ case Hexagon::POST_STwri_cdnPt_V4:
+ return Hexagon::POST_STwri_cdnPt_nv_V4;
+
+ case Hexagon::POST_STwri_cNotPt:
+ return Hexagon::POST_STwri_cNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+ return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cPt_V4:
+ return Hexagon::STw_GP_cPt_nv_V4;
+
+ case Hexagon::STw_GP_cNotPt_V4:
+ return Hexagon::STw_GP_cNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cdnPt_V4:
+ return Hexagon::STw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_GP_cPt_V4:
+ return Hexagon::STriw_GP_cPt_nv_V4;
+
+ case Hexagon::STriw_GP_cNotPt_V4:
+ return Hexagon::STriw_GP_cNotPt_nv_V4;
+
+ case Hexagon::STriw_GP_cdnPt_V4:
+ return Hexagon::STriw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ return Hexagon::STriw_GP_cdnNotPt_nv_V4;
+ }
+}
+
+// Return .new predicate version for an instruction
+static int GetDotNewPredOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .new type");
+
+ // Conditional stores
+ // Store byte conditionally
+ case Hexagon::STrib_cPt :
+ return Hexagon::STrib_cdnPt_V4;
+
+ case Hexagon::STrib_cNotPt :
+ return Hexagon::STrib_cdnNotPt_V4;
+
+ case Hexagon::STrib_indexed_cPt :
+ return Hexagon::STrib_indexed_cdnPt_V4;
+
+ case Hexagon::STrib_indexed_cNotPt :
+ return Hexagon::STrib_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrib_imm_cPt_V4 :
+ return Hexagon::STrib_imm_cdnPt_V4;
+
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ return Hexagon::STrib_imm_cdnNotPt_V4;
+
+ case Hexagon::POST_STbri_cPt :
+ return Hexagon::POST_STbri_cdnPt_V4;
+
+ case Hexagon::POST_STbri_cNotPt :
+ return Hexagon::POST_STbri_cdnNotPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ return Hexagon::STrib_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrib_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::STb_GP_cPt_V4 :
+ return Hexagon::STb_GP_cdnPt_V4;
+
+ case Hexagon::STb_GP_cNotPt_V4 :
+ return Hexagon::STb_GP_cdnNotPt_V4;
+
+ case Hexagon::STrib_GP_cPt_V4 :
+ return Hexagon::STrib_GP_cdnPt_V4;
+
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ return Hexagon::STrib_GP_cdnNotPt_V4;
+
+ // Store doubleword conditionally
+ case Hexagon::STrid_cPt :
+ return Hexagon::STrid_cdnPt_V4;
+
+ case Hexagon::STrid_cNotPt :
+ return Hexagon::STrid_cdnNotPt_V4;
+
+ case Hexagon::STrid_indexed_cPt :
+ return Hexagon::STrid_indexed_cdnPt_V4;
+
+ case Hexagon::STrid_indexed_cNotPt :
+ return Hexagon::STrid_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ return Hexagon::STrid_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrid_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_STdri_cPt :
+ return Hexagon::POST_STdri_cdnPt_V4;
+
+ case Hexagon::POST_STdri_cNotPt :
+ return Hexagon::POST_STdri_cdnNotPt_V4;
+
+ case Hexagon::STd_GP_cPt_V4 :
+ return Hexagon::STd_GP_cdnPt_V4;
+
+ case Hexagon::STd_GP_cNotPt_V4 :
+ return Hexagon::STd_GP_cdnNotPt_V4;
+
+ case Hexagon::STrid_GP_cPt_V4 :
+ return Hexagon::STrid_GP_cdnPt_V4;
+
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ return Hexagon::STrid_GP_cdnNotPt_V4;
+
+ // Store halfword conditionally
+ case Hexagon::STrih_cPt :
+ return Hexagon::STrih_cdnPt_V4;
+
+ case Hexagon::STrih_cNotPt :
+ return Hexagon::STrih_cdnNotPt_V4;
+
+ case Hexagon::STrih_indexed_cPt :
+ return Hexagon::STrih_indexed_cdnPt_V4;
+
+ case Hexagon::STrih_indexed_cNotPt :
+ return Hexagon::STrih_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrih_imm_cPt_V4 :
+ return Hexagon::STrih_imm_cdnPt_V4;
+
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ return Hexagon::STrih_imm_cdnNotPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ return Hexagon::STrih_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrih_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_SThri_cPt :
+ return Hexagon::POST_SThri_cdnPt_V4;
+
+ case Hexagon::POST_SThri_cNotPt :
+ return Hexagon::POST_SThri_cdnNotPt_V4;
+
+ case Hexagon::STh_GP_cPt_V4 :
+ return Hexagon::STh_GP_cdnPt_V4;
+
+ case Hexagon::STh_GP_cNotPt_V4 :
+ return Hexagon::STh_GP_cdnNotPt_V4;
+
+ case Hexagon::STrih_GP_cPt_V4 :
+ return Hexagon::STrih_GP_cdnPt_V4;
+
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ return Hexagon::STrih_GP_cdnNotPt_V4;
+
+ // Store word conditionally
+ case Hexagon::STriw_cPt :
+ return Hexagon::STriw_cdnPt_V4;
+
+ case Hexagon::STriw_cNotPt :
+ return Hexagon::STriw_cdnNotPt_V4;
+
+ case Hexagon::STriw_indexed_cPt :
+ return Hexagon::STriw_indexed_cdnPt_V4;
+
+ case Hexagon::STriw_indexed_cNotPt :
+ return Hexagon::STriw_indexed_cdnNotPt_V4;
+
+ case Hexagon::STriw_imm_cPt_V4 :
+ return Hexagon::STriw_imm_cdnPt_V4;
+
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ return Hexagon::STriw_imm_cdnNotPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ return Hexagon::STriw_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ return Hexagon::STriw_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_STwri_cPt :
+ return Hexagon::POST_STwri_cdnPt_V4;
+
+ case Hexagon::POST_STwri_cNotPt :
+ return Hexagon::POST_STwri_cdnNotPt_V4;
+
+ case Hexagon::STw_GP_cPt_V4 :
+ return Hexagon::STw_GP_cdnPt_V4;
+
+ case Hexagon::STw_GP_cNotPt_V4 :
+ return Hexagon::STw_GP_cdnNotPt_V4;
+
+ case Hexagon::STriw_GP_cPt_V4 :
+ return Hexagon::STriw_GP_cdnPt_V4;
+
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ return Hexagon::STriw_GP_cdnNotPt_V4;
+
+ // Condtional Jumps
+ case Hexagon::JMP_c:
+ return Hexagon::JMP_cdnPt;
+
+ case Hexagon::JMP_cNot:
+ return Hexagon::JMP_cdnNotPt;
+
+ case Hexagon::JMPR_cPt:
+ return Hexagon::JMPR_cdnPt_V3;
+
+ case Hexagon::JMPR_cNotPt:
+ return Hexagon::JMPR_cdnNotPt_V3;
+
+ // Conditional Transfers
+ case Hexagon::TFR_cPt:
+ return Hexagon::TFR_cdnPt;
+
+ case Hexagon::TFR_cNotPt:
+ return Hexagon::TFR_cdnNotPt;
+
+ case Hexagon::TFRI_cPt:
+ return Hexagon::TFRI_cdnPt;
+
+ case Hexagon::TFRI_cNotPt:
+ return Hexagon::TFRI_cdnNotPt;
+
+ // Load double word
+ case Hexagon::LDrid_cPt :
+ return Hexagon::LDrid_cdnPt;
+
+ case Hexagon::LDrid_cNotPt :
+ return Hexagon::LDrid_cdnNotPt;
+
+ case Hexagon::LDrid_indexed_cPt :
+ return Hexagon::LDrid_indexed_cdnPt;
+
+ case Hexagon::LDrid_indexed_cNotPt :
+ return Hexagon::LDrid_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrid_cPt :
+ return Hexagon::POST_LDrid_cdnPt_V4;
+
+ case Hexagon::POST_LDrid_cNotPt :
+ return Hexagon::POST_LDrid_cdnNotPt_V4;
+
+ // Load word
+ case Hexagon::LDriw_cPt :
+ return Hexagon::LDriw_cdnPt;
+
+ case Hexagon::LDriw_cNotPt :
+ return Hexagon::LDriw_cdnNotPt;
+
+ case Hexagon::LDriw_indexed_cPt :
+ return Hexagon::LDriw_indexed_cdnPt;
+
+ case Hexagon::LDriw_indexed_cNotPt :
+ return Hexagon::LDriw_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriw_cPt :
+ return Hexagon::POST_LDriw_cdnPt_V4;
+
+ case Hexagon::POST_LDriw_cNotPt :
+ return Hexagon::POST_LDriw_cdnNotPt_V4;
+
+ // Load halfword
+ case Hexagon::LDrih_cPt :
+ return Hexagon::LDrih_cdnPt;
+
+ case Hexagon::LDrih_cNotPt :
+ return Hexagon::LDrih_cdnNotPt;
+
+ case Hexagon::LDrih_indexed_cPt :
+ return Hexagon::LDrih_indexed_cdnPt;
+
+ case Hexagon::LDrih_indexed_cNotPt :
+ return Hexagon::LDrih_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrih_cPt :
+ return Hexagon::POST_LDrih_cdnPt_V4;
+
+ case Hexagon::POST_LDrih_cNotPt :
+ return Hexagon::POST_LDrih_cdnNotPt_V4;
+
+ // Load byte
+ case Hexagon::LDrib_cPt :
+ return Hexagon::LDrib_cdnPt;
+
+ case Hexagon::LDrib_cNotPt :
+ return Hexagon::LDrib_cdnNotPt;
+
+ case Hexagon::LDrib_indexed_cPt :
+ return Hexagon::LDrib_indexed_cdnPt;
+
+ case Hexagon::LDrib_indexed_cNotPt :
+ return Hexagon::LDrib_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrib_cPt :
+ return Hexagon::POST_LDrib_cdnPt_V4;
+
+ case Hexagon::POST_LDrib_cNotPt :
+ return Hexagon::POST_LDrib_cdnNotPt_V4;
+
+ // Load unsigned halfword
+ case Hexagon::LDriuh_cPt :
+ return Hexagon::LDriuh_cdnPt;
+
+ case Hexagon::LDriuh_cNotPt :
+ return Hexagon::LDriuh_cdnNotPt;
+
+ case Hexagon::LDriuh_indexed_cPt :
+ return Hexagon::LDriuh_indexed_cdnPt;
+
+ case Hexagon::LDriuh_indexed_cNotPt :
+ return Hexagon::LDriuh_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriuh_cPt :
+ return Hexagon::POST_LDriuh_cdnPt_V4;
+
+ case Hexagon::POST_LDriuh_cNotPt :
+ return Hexagon::POST_LDriuh_cdnNotPt_V4;
+
+ // Load unsigned byte
+ case Hexagon::LDriub_cPt :
+ return Hexagon::LDriub_cdnPt;
+
+ case Hexagon::LDriub_cNotPt :
+ return Hexagon::LDriub_cdnNotPt;
+
+ case Hexagon::LDriub_indexed_cPt :
+ return Hexagon::LDriub_indexed_cdnPt;
+
+ case Hexagon::LDriub_indexed_cNotPt :
+ return Hexagon::LDriub_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriub_cPt :
+ return Hexagon::POST_LDriub_cdnPt_V4;
+
+ case Hexagon::POST_LDriub_cNotPt :
+ return Hexagon::POST_LDriub_cdnNotPt_V4;
+
+ // V4 indexed+scaled load
+
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ return Hexagon::LDrid_indexed_cdnPt_V4;
+
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ return Hexagon::LDrid_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ return Hexagon::LDrib_indexed_cdnPt_V4;
+
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ return Hexagon::LDrib_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ return Hexagon::LDriub_indexed_cdnPt_V4;
+
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ return Hexagon::LDriub_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ return Hexagon::LDrih_indexed_cdnPt_V4;
+
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ return Hexagon::LDrih_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ return Hexagon::LDriuh_indexed_cdnPt_V4;
+
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ return Hexagon::LDriuh_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ return Hexagon::LDriw_indexed_cdnPt_V4;
+
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ return Hexagon::LDriw_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cdnNotPt_V4;
+
+ // V4 global address load
+
+ case Hexagon::LDd_GP_cPt_V4:
+ return Hexagon::LDd_GP_cdnPt_V4;
+
+ case Hexagon::LDd_GP_cNotPt_V4:
+ return Hexagon::LDd_GP_cdnNotPt_V4;
+
+ case Hexagon::LDb_GP_cPt_V4:
+ return Hexagon::LDb_GP_cdnPt_V4;
+
+ case Hexagon::LDb_GP_cNotPt_V4:
+ return Hexagon::LDb_GP_cdnNotPt_V4;
+
+ case Hexagon::LDub_GP_cPt_V4:
+ return Hexagon::LDub_GP_cdnPt_V4;
+
+ case Hexagon::LDub_GP_cNotPt_V4:
+ return Hexagon::LDub_GP_cdnNotPt_V4;
+
+ case Hexagon::LDh_GP_cPt_V4:
+ return Hexagon::LDh_GP_cdnPt_V4;
+
+ case Hexagon::LDh_GP_cNotPt_V4:
+ return Hexagon::LDh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDuh_GP_cPt_V4:
+ return Hexagon::LDuh_GP_cdnPt_V4;
+
+ case Hexagon::LDuh_GP_cNotPt_V4:
+ return Hexagon::LDuh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDw_GP_cPt_V4:
+ return Hexagon::LDw_GP_cdnPt_V4;
+
+ case Hexagon::LDw_GP_cNotPt_V4:
+ return Hexagon::LDw_GP_cdnNotPt_V4;
+
+ case Hexagon::LDrid_GP_cPt_V4:
+ return Hexagon::LDrid_GP_cdnPt_V4;
+
+ case Hexagon::LDrid_GP_cNotPt_V4:
+ return Hexagon::LDrid_GP_cdnNotPt_V4;
+
+ case Hexagon::LDrib_GP_cPt_V4:
+ return Hexagon::LDrib_GP_cdnPt_V4;
+
+ case Hexagon::LDrib_GP_cNotPt_V4:
+ return Hexagon::LDrib_GP_cdnNotPt_V4;
+
+ case Hexagon::LDriub_GP_cPt_V4:
+ return Hexagon::LDriub_GP_cdnPt_V4;
+
+ case Hexagon::LDriub_GP_cNotPt_V4:
+ return Hexagon::LDriub_GP_cdnNotPt_V4;
+
+ case Hexagon::LDrih_GP_cPt_V4:
+ return Hexagon::LDrih_GP_cdnPt_V4;
+
+ case Hexagon::LDrih_GP_cNotPt_V4:
+ return Hexagon::LDrih_GP_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_GP_cPt_V4:
+ return Hexagon::LDriuh_GP_cdnPt_V4;
+
+ case Hexagon::LDriuh_GP_cNotPt_V4:
+ return Hexagon::LDriuh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDriw_GP_cPt_V4:
+ return Hexagon::LDriw_GP_cdnPt_V4;
+
+ case Hexagon::LDriw_GP_cNotPt_V4:
+ return Hexagon::LDriw_GP_cdnNotPt_V4;
+
+ // Conditional store new-value byte
+ case Hexagon::STrib_cPt_nv_V4 :
+ return Hexagon::STrib_cdnPt_nv_V4;
+ case Hexagon::STrib_cNotPt_nv_V4 :
+ return Hexagon::STrib_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cPt_nv_V4 :
+ return Hexagon::STrib_indexed_cdnPt_nv_V4;
+ case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cPt_nv_V4 :
+ return Hexagon::POST_STbri_cdnPt_nv_V4;
+ case Hexagon::POST_STbri_cNotPt_nv_V4 :
+ return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cPt_nv_V4 :
+ return Hexagon::STb_GP_cdnPt_nv_V4;
+
+ case Hexagon::STb_GP_cNotPt_nv_V4 :
+ return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_GP_cPt_nv_V4 :
+ return Hexagon::STrib_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrib_GP_cNotPt_nv_V4 :
+ return Hexagon::STrib_GP_cdnNotPt_nv_V4;
+
+ // Conditional store new-value halfword
+ case Hexagon::STrih_cPt_nv_V4 :
+ return Hexagon::STrih_cdnPt_nv_V4;
+ case Hexagon::STrih_cNotPt_nv_V4 :
+ return Hexagon::STrih_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cPt_nv_V4 :
+ return Hexagon::STrih_indexed_cdnPt_nv_V4;
+ case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+ return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cPt_nv_V4 :
+ return Hexagon::POST_SThri_cdnPt_nv_V4;
+ case Hexagon::POST_SThri_cNotPt_nv_V4 :
+ return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cPt_nv_V4 :
+ return Hexagon::STh_GP_cdnPt_nv_V4;
+
+ case Hexagon::STh_GP_cNotPt_nv_V4 :
+ return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_GP_cPt_nv_V4 :
+ return Hexagon::STrih_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrih_GP_cNotPt_nv_V4 :
+ return Hexagon::STrih_GP_cdnNotPt_nv_V4;
+
+ // Conditional store new-value word
+ case Hexagon::STriw_cPt_nv_V4 :
+ return Hexagon::STriw_cdnPt_nv_V4;
+ case Hexagon::STriw_cNotPt_nv_V4 :
+ return Hexagon::STriw_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cPt_nv_V4 :
+ return Hexagon::STriw_indexed_cdnPt_nv_V4;
+ case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+ return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cPt_nv_V4 :
+ return Hexagon::POST_STwri_cdnPt_nv_V4;
+ case Hexagon::POST_STwri_cNotPt_nv_V4:
+ return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cPt_nv_V4 :
+ return Hexagon::STw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STw_GP_cNotPt_nv_V4 :
+ return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_GP_cPt_nv_V4 :
+ return Hexagon::STriw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STriw_GP_cNotPt_nv_V4 :
+ return Hexagon::STriw_GP_cdnNotPt_nv_V4;
+
+ // Conditional add
+ case Hexagon::ADD_ri_cPt :
+ return Hexagon::ADD_ri_cdnPt;
+ case Hexagon::ADD_ri_cNotPt :
+ return Hexagon::ADD_ri_cdnNotPt;
+
+ case Hexagon::ADD_rr_cPt :
+ return Hexagon::ADD_rr_cdnPt;
+ case Hexagon::ADD_rr_cNotPt :
+ return Hexagon::ADD_rr_cdnNotPt;
+
+ // Conditional logical Operations
+ case Hexagon::XOR_rr_cPt :
+ return Hexagon::XOR_rr_cdnPt;
+ case Hexagon::XOR_rr_cNotPt :
+ return Hexagon::XOR_rr_cdnNotPt;
+
+ case Hexagon::AND_rr_cPt :
+ return Hexagon::AND_rr_cdnPt;
+ case Hexagon::AND_rr_cNotPt :
+ return Hexagon::AND_rr_cdnNotPt;
+
+ case Hexagon::OR_rr_cPt :
+ return Hexagon::OR_rr_cdnPt;
+ case Hexagon::OR_rr_cNotPt :
+ return Hexagon::OR_rr_cdnNotPt;
+
+ // Conditional Subtract
+ case Hexagon::SUB_rr_cPt :
+ return Hexagon::SUB_rr_cdnPt;
+ case Hexagon::SUB_rr_cNotPt :
+ return Hexagon::SUB_rr_cdnNotPt;
+
+ // Conditional combine
+ case Hexagon::COMBINE_rr_cPt :
+ return Hexagon::COMBINE_rr_cdnPt;
+ case Hexagon::COMBINE_rr_cNotPt :
+ return Hexagon::COMBINE_rr_cdnNotPt;
+
+ case Hexagon::ASLH_cPt_V4 :
+ return Hexagon::ASLH_cdnPt_V4;
+ case Hexagon::ASLH_cNotPt_V4 :
+ return Hexagon::ASLH_cdnNotPt_V4;
+
+ case Hexagon::ASRH_cPt_V4 :
+ return Hexagon::ASRH_cdnPt_V4;
+ case Hexagon::ASRH_cNotPt_V4 :
+ return Hexagon::ASRH_cdnNotPt_V4;
+
+ case Hexagon::SXTB_cPt_V4 :
+ return Hexagon::SXTB_cdnPt_V4;
+ case Hexagon::SXTB_cNotPt_V4 :
+ return Hexagon::SXTB_cdnNotPt_V4;
+
+ case Hexagon::SXTH_cPt_V4 :
+ return Hexagon::SXTH_cdnPt_V4;
+ case Hexagon::SXTH_cNotPt_V4 :
+ return Hexagon::SXTH_cdnNotPt_V4;
+
+ case Hexagon::ZXTB_cPt_V4 :
+ return Hexagon::ZXTB_cdnPt_V4;
+ case Hexagon::ZXTB_cNotPt_V4 :
+ return Hexagon::ZXTB_cdnNotPt_V4;
+
+ case Hexagon::ZXTH_cPt_V4 :
+ return Hexagon::ZXTH_cdnPt_V4;
+ case Hexagon::ZXTH_cNotPt_V4 :
+ return Hexagon::ZXTH_cdnNotPt_V4;
+ }
+}
+
+// Returns true if an instruction can be promoted to .new predicate
+// or new-value store.
+bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
+ if ( isCondInst(MI) || IsNewifyStore(MI))
+ return true;
+ else
+ return false;
+}
+
+bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const MCInstrDesc& TID = MI->getDesc();
+ // bug 5670: until that is fixed,
+ // this portion is disabled.
+ if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) ||
+ || QII->isConditionalTransfer(MI)
+ || QII->isConditionalALU32(MI)
+ || QII->isConditionalLoad(MI)
+ || QII->isConditionalStore(MI)) {
+ return true;
+ }
+ return false;
+}
+
+
+// Promote an instructiont to its .new form.
+// At this time, we have already made a call to CanPromoteToDotNew
+// and made sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+
+ assert (DepType == SDep::Data);
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ int NewOpcode;
+ if (RC == Hexagon::PredRegsRegisterClass)
+ NewOpcode = GetDotNewPredOp(MI->getOpcode());
+ else
+ NewOpcode = GetDotNewOp(MI->getOpcode());
+ MI->setDesc(QII->get(NewOpcode));
+
+ return true;
+}
+
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new --->
+// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1 --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+// if (p0.new) R2 = add(R3, R4)
+// R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+static int GetDotOldOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .old type");
+
+ case Hexagon::TFR_cdnPt:
+ return Hexagon::TFR_cPt;
+
+ case Hexagon::TFR_cdnNotPt:
+ return Hexagon::TFR_cNotPt;
+
+ case Hexagon::TFRI_cdnPt:
+ return Hexagon::TFRI_cPt;
+
+ case Hexagon::TFRI_cdnNotPt:
+ return Hexagon::TFRI_cNotPt;
+
+ case Hexagon::JMP_cdnPt:
+ return Hexagon::JMP_c;
+
+ case Hexagon::JMP_cdnNotPt:
+ return Hexagon::JMP_cNot;
+
+ case Hexagon::JMPR_cdnPt_V3:
+ return Hexagon::JMPR_cPt;
+
+ case Hexagon::JMPR_cdnNotPt_V3:
+ return Hexagon::JMPR_cNotPt;
+
+ // Load double word
+
+ case Hexagon::LDrid_cdnPt :
+ return Hexagon::LDrid_cPt;
+
+ case Hexagon::LDrid_cdnNotPt :
+ return Hexagon::LDrid_cNotPt;
+
+ case Hexagon::LDrid_indexed_cdnPt :
+ return Hexagon::LDrid_indexed_cPt;
+
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ return Hexagon::LDrid_indexed_cNotPt;
+
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ return Hexagon::POST_LDrid_cPt;
+
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ return Hexagon::POST_LDrid_cNotPt;
+
+ // Load word
+
+ case Hexagon::LDriw_cdnPt :
+ return Hexagon::LDriw_cPt;
+
+ case Hexagon::LDriw_cdnNotPt :
+ return Hexagon::LDriw_cNotPt;
+
+ case Hexagon::LDriw_indexed_cdnPt :
+ return Hexagon::LDriw_indexed_cPt;
+
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ return Hexagon::LDriw_indexed_cNotPt;
+
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ return Hexagon::POST_LDriw_cPt;
+
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ return Hexagon::POST_LDriw_cNotPt;
+
+ // Load half
+
+ case Hexagon::LDrih_cdnPt :
+ return Hexagon::LDrih_cPt;
+
+ case Hexagon::LDrih_cdnNotPt :
+ return Hexagon::LDrih_cNotPt;
+
+ case Hexagon::LDrih_indexed_cdnPt :
+ return Hexagon::LDrih_indexed_cPt;
+
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ return Hexagon::LDrih_indexed_cNotPt;
+
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ return Hexagon::POST_LDrih_cPt;
+
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ return Hexagon::POST_LDrih_cNotPt;
+
+ // Load byte
+
+ case Hexagon::LDrib_cdnPt :
+ return Hexagon::LDrib_cPt;
+
+ case Hexagon::LDrib_cdnNotPt :
+ return Hexagon::LDrib_cNotPt;
+
+ case Hexagon::LDrib_indexed_cdnPt :
+ return Hexagon::LDrib_indexed_cPt;
+
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ return Hexagon::LDrib_indexed_cNotPt;
+
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ return Hexagon::POST_LDrib_cPt;
+
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ return Hexagon::POST_LDrib_cNotPt;
+
+ // Load unsigned half
+
+ case Hexagon::LDriuh_cdnPt :
+ return Hexagon::LDriuh_cPt;
+
+ case Hexagon::LDriuh_cdnNotPt :
+ return Hexagon::LDriuh_cNotPt;
+
+ case Hexagon::LDriuh_indexed_cdnPt :
+ return Hexagon::LDriuh_indexed_cPt;
+
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ return Hexagon::LDriuh_indexed_cNotPt;
+
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ return Hexagon::POST_LDriuh_cPt;
+
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ return Hexagon::POST_LDriuh_cNotPt;
+
+ // Load unsigned byte
+ case Hexagon::LDriub_cdnPt :
+ return Hexagon::LDriub_cPt;
+
+ case Hexagon::LDriub_cdnNotPt :
+ return Hexagon::LDriub_cNotPt;
+
+ case Hexagon::LDriub_indexed_cdnPt :
+ return Hexagon::LDriub_indexed_cPt;
+
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ return Hexagon::LDriub_indexed_cNotPt;
+
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ return Hexagon::POST_LDriub_cPt;
+
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ return Hexagon::POST_LDriub_cNotPt;
+
+ // V4 indexed+scaled Load
+
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ return Hexagon::LDrid_indexed_cPt_V4;
+
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ return Hexagon::LDrid_indexed_cNotPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ return Hexagon::LDrib_indexed_cPt_V4;
+
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ return Hexagon::LDrib_indexed_cNotPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ return Hexagon::LDriub_indexed_cPt_V4;
+
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ return Hexagon::LDriub_indexed_cNotPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ return Hexagon::LDrih_indexed_cPt_V4;
+
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ return Hexagon::LDrih_indexed_cNotPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ return Hexagon::LDriuh_indexed_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ return Hexagon::LDriuh_indexed_cNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ return Hexagon::LDriw_indexed_cPt_V4;
+
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ return Hexagon::LDriw_indexed_cNotPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+
+ // V4 global address load
+
+ case Hexagon::LDd_GP_cdnPt_V4:
+ return Hexagon::LDd_GP_cPt_V4;
+
+ case Hexagon::LDd_GP_cdnNotPt_V4:
+ return Hexagon::LDd_GP_cNotPt_V4;
+
+ case Hexagon::LDb_GP_cdnPt_V4:
+ return Hexagon::LDb_GP_cPt_V4;
+
+ case Hexagon::LDb_GP_cdnNotPt_V4:
+ return Hexagon::LDb_GP_cNotPt_V4;
+
+ case Hexagon::LDub_GP_cdnPt_V4:
+ return Hexagon::LDub_GP_cPt_V4;
+
+ case Hexagon::LDub_GP_cdnNotPt_V4:
+ return Hexagon::LDub_GP_cNotPt_V4;
+
+ case Hexagon::LDh_GP_cdnPt_V4:
+ return Hexagon::LDh_GP_cPt_V4;
+
+ case Hexagon::LDh_GP_cdnNotPt_V4:
+ return Hexagon::LDh_GP_cNotPt_V4;
+
+ case Hexagon::LDuh_GP_cdnPt_V4:
+ return Hexagon::LDuh_GP_cPt_V4;
+
+ case Hexagon::LDuh_GP_cdnNotPt_V4:
+ return Hexagon::LDuh_GP_cNotPt_V4;
+
+ case Hexagon::LDw_GP_cdnPt_V4:
+ return Hexagon::LDw_GP_cPt_V4;
+
+ case Hexagon::LDw_GP_cdnNotPt_V4:
+ return Hexagon::LDw_GP_cNotPt_V4;
+
+ case Hexagon::LDrid_GP_cdnPt_V4:
+ return Hexagon::LDrid_GP_cPt_V4;
+
+ case Hexagon::LDrid_GP_cdnNotPt_V4:
+ return Hexagon::LDrid_GP_cNotPt_V4;
+
+ case Hexagon::LDrib_GP_cdnPt_V4:
+ return Hexagon::LDrib_GP_cPt_V4;
+
+ case Hexagon::LDrib_GP_cdnNotPt_V4:
+ return Hexagon::LDrib_GP_cNotPt_V4;
+
+ case Hexagon::LDriub_GP_cdnPt_V4:
+ return Hexagon::LDriub_GP_cPt_V4;
+
+ case Hexagon::LDriub_GP_cdnNotPt_V4:
+ return Hexagon::LDriub_GP_cNotPt_V4;
+
+ case Hexagon::LDrih_GP_cdnPt_V4:
+ return Hexagon::LDrih_GP_cPt_V4;
+
+ case Hexagon::LDrih_GP_cdnNotPt_V4:
+ return Hexagon::LDrih_GP_cNotPt_V4;
+
+ case Hexagon::LDriuh_GP_cdnPt_V4:
+ return Hexagon::LDriuh_GP_cPt_V4;
+
+ case Hexagon::LDriuh_GP_cdnNotPt_V4:
+ return Hexagon::LDriuh_GP_cNotPt_V4;
+
+ case Hexagon::LDriw_GP_cdnPt_V4:
+ return Hexagon::LDriw_GP_cPt_V4;
+
+ case Hexagon::LDriw_GP_cdnNotPt_V4:
+ return Hexagon::LDriw_GP_cNotPt_V4;
+
+ // Conditional add
+
+ case Hexagon::ADD_ri_cdnPt :
+ return Hexagon::ADD_ri_cPt;
+ case Hexagon::ADD_ri_cdnNotPt :
+ return Hexagon::ADD_ri_cNotPt;
+
+ case Hexagon::ADD_rr_cdnPt :
+ return Hexagon::ADD_rr_cPt;
+ case Hexagon::ADD_rr_cdnNotPt:
+ return Hexagon::ADD_rr_cNotPt;
+
+ // Conditional logical Operations
+
+ case Hexagon::XOR_rr_cdnPt :
+ return Hexagon::XOR_rr_cPt;
+ case Hexagon::XOR_rr_cdnNotPt :
+ return Hexagon::XOR_rr_cNotPt;
+
+ case Hexagon::AND_rr_cdnPt :
+ return Hexagon::AND_rr_cPt;
+ case Hexagon::AND_rr_cdnNotPt :
+ return Hexagon::AND_rr_cNotPt;
+
+ case Hexagon::OR_rr_cdnPt :
+ return Hexagon::OR_rr_cPt;
+ case Hexagon::OR_rr_cdnNotPt :
+ return Hexagon::OR_rr_cNotPt;
+
+ // Conditional Subtract
+
+ case Hexagon::SUB_rr_cdnPt :
+ return Hexagon::SUB_rr_cPt;
+ case Hexagon::SUB_rr_cdnNotPt :
+ return Hexagon::SUB_rr_cNotPt;
+
+ // Conditional combine
+
+ case Hexagon::COMBINE_rr_cdnPt :
+ return Hexagon::COMBINE_rr_cPt;
+ case Hexagon::COMBINE_rr_cdnNotPt :
+ return Hexagon::COMBINE_rr_cNotPt;
+
+// Conditional shift operations
+
+ case Hexagon::ASLH_cdnPt_V4 :
+ return Hexagon::ASLH_cPt_V4;
+ case Hexagon::ASLH_cdnNotPt_V4 :
+ return Hexagon::ASLH_cNotPt_V4;
+
+ case Hexagon::ASRH_cdnPt_V4 :
+ return Hexagon::ASRH_cPt_V4;
+ case Hexagon::ASRH_cdnNotPt_V4 :
+ return Hexagon::ASRH_cNotPt_V4;
+
+ case Hexagon::SXTB_cdnPt_V4 :
+ return Hexagon::SXTB_cPt_V4;
+ case Hexagon::SXTB_cdnNotPt_V4 :
+ return Hexagon::SXTB_cNotPt_V4;
+
+ case Hexagon::SXTH_cdnPt_V4 :
+ return Hexagon::SXTH_cPt_V4;
+ case Hexagon::SXTH_cdnNotPt_V4 :
+ return Hexagon::SXTH_cNotPt_V4;
+
+ case Hexagon::ZXTB_cdnPt_V4 :
+ return Hexagon::ZXTB_cPt_V4;
+ case Hexagon::ZXTB_cdnNotPt_V4 :
+ return Hexagon::ZXTB_cNotPt_V4;
+
+ case Hexagon::ZXTH_cdnPt_V4 :
+ return Hexagon::ZXTH_cPt_V4;
+ case Hexagon::ZXTH_cdnNotPt_V4 :
+ return Hexagon::ZXTH_cNotPt_V4;
+
+ // Store byte
+
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ return Hexagon::STrib_imm_cPt_V4;
+
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ return Hexagon::STrib_imm_cNotPt_V4;
+
+ case Hexagon::STrib_cdnPt_nv_V4 :
+ case Hexagon::STrib_cPt_nv_V4 :
+ case Hexagon::STrib_cdnPt_V4 :
+ return Hexagon::STrib_cPt;
+
+ case Hexagon::STrib_cdnNotPt_nv_V4 :
+ case Hexagon::STrib_cNotPt_nv_V4 :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ return Hexagon::STrib_cNotPt;
+
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cPt_nv_V4 :
+ case Hexagon::STrib_indexed_cdnPt_nv_V4 :
+ return Hexagon::STrib_indexed_cPt;
+
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+ case Hexagon::STrib_indexed_cdnNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_cNotPt;
+
+ case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrib_indexed_shl_cPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrib_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STbri_cdnPt_nv_V4 :
+ case Hexagon::POST_STbri_cPt_nv_V4 :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ return Hexagon::POST_STbri_cPt;
+
+ case Hexagon::POST_STbri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_STbri_cNotPt_nv_V4:
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ return Hexagon::POST_STbri_cNotPt;
+
+ case Hexagon::STb_GP_cdnPt_nv_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cPt_nv_V4:
+ return Hexagon::STb_GP_cPt_V4;
+
+ case Hexagon::STb_GP_cdnNotPt_nv_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STb_GP_cNotPt_nv_V4:
+ return Hexagon::STb_GP_cNotPt_V4;
+
+ case Hexagon::STrib_GP_cdnPt_nv_V4:
+ case Hexagon::STrib_GP_cdnPt_V4:
+ case Hexagon::STrib_GP_cPt_nv_V4:
+ return Hexagon::STrib_GP_cPt_V4;
+
+ case Hexagon::STrib_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+ case Hexagon::STrib_GP_cNotPt_nv_V4:
+ return Hexagon::STrib_GP_cNotPt_V4;
+
+ // Store new-value byte - unconditional
+ case Hexagon::STrib_nv_V4:
+ return Hexagon::STrib;
+
+ case Hexagon::STrib_indexed_nv_V4:
+ return Hexagon::STrib_indexed;
+
+ case Hexagon::STrib_indexed_shl_nv_V4:
+ return Hexagon::STrib_indexed_shl_V4;
+
+ case Hexagon::STrib_shl_nv_V4:
+ return Hexagon::STrib_shl_V4;
+
+ case Hexagon::STrib_GP_nv_V4:
+ return Hexagon::STrib_GP_V4;
+
+ case Hexagon::STb_GP_nv_V4:
+ return Hexagon::STb_GP_V4;
+
+ case Hexagon::POST_STbri_nv_V4:
+ return Hexagon::POST_STbri;
+
+ // Store halfword
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ return Hexagon::STrih_imm_cPt_V4;
+
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ return Hexagon::STrih_imm_cNotPt_V4;
+
+ case Hexagon::STrih_cdnPt_nv_V4 :
+ case Hexagon::STrih_cPt_nv_V4 :
+ case Hexagon::STrih_cdnPt_V4 :
+ return Hexagon::STrih_cPt;
+
+ case Hexagon::STrih_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_cNotPt_nv_V4 :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ return Hexagon::STrih_cNotPt;
+
+ case Hexagon::STrih_indexed_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_cPt_nv_V4 :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ return Hexagon::STrih_indexed_cPt;
+
+ case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ return Hexagon::STrih_indexed_cNotPt;
+
+ case Hexagon::STrih_indexed_shl_cdnPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrih_indexed_shl_cPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrih_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_SThri_cdnPt_nv_V4 :
+ case Hexagon::POST_SThri_cPt_nv_V4 :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ return Hexagon::POST_SThri_cPt;
+
+ case Hexagon::POST_SThri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_SThri_cNotPt_nv_V4 :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+ return Hexagon::POST_SThri_cNotPt;
+
+ case Hexagon::STh_GP_cdnPt_nv_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cPt_nv_V4:
+ return Hexagon::STh_GP_cPt_V4;
+
+ case Hexagon::STh_GP_cdnNotPt_nv_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STh_GP_cNotPt_nv_V4:
+ return Hexagon::STh_GP_cNotPt_V4;
+
+ case Hexagon::STrih_GP_cdnPt_nv_V4:
+ case Hexagon::STrih_GP_cdnPt_V4:
+ case Hexagon::STrih_GP_cPt_nv_V4:
+ return Hexagon::STrih_GP_cPt_V4;
+
+ case Hexagon::STrih_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+ case Hexagon::STrih_GP_cNotPt_nv_V4:
+ return Hexagon::STrih_GP_cNotPt_V4;
+
+ // Store new-value halfword - unconditional
+
+ case Hexagon::STrih_nv_V4:
+ return Hexagon::STrih;
+
+ case Hexagon::STrih_indexed_nv_V4:
+ return Hexagon::STrih_indexed;
+
+ case Hexagon::STrih_indexed_shl_nv_V4:
+ return Hexagon::STrih_indexed_shl_V4;
+
+ case Hexagon::STrih_shl_nv_V4:
+ return Hexagon::STrih_shl_V4;
+
+ case Hexagon::STrih_GP_nv_V4:
+ return Hexagon::STrih_GP_V4;
+
+ case Hexagon::STh_GP_nv_V4:
+ return Hexagon::STh_GP_V4;
+
+ case Hexagon::POST_SThri_nv_V4:
+ return Hexagon::POST_SThri;
+
+ // Store word
+
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ return Hexagon::STriw_imm_cPt_V4;
+
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ return Hexagon::STriw_imm_cNotPt_V4;
+
+ case Hexagon::STriw_cdnPt_nv_V4 :
+ case Hexagon::STriw_cPt_nv_V4 :
+ case Hexagon::STriw_cdnPt_V4 :
+ return Hexagon::STriw_cPt;
+
+ case Hexagon::STriw_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_cNotPt_nv_V4 :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ return Hexagon::STriw_cNotPt;
+
+ case Hexagon::STriw_indexed_cdnPt_nv_V4 :
+ case Hexagon::STriw_indexed_cPt_nv_V4 :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ return Hexagon::STriw_indexed_cPt;
+
+ case Hexagon::STriw_indexed_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ return Hexagon::STriw_indexed_cNotPt;
+
+ case Hexagon::STriw_indexed_shl_cdnPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ return Hexagon::STriw_indexed_shl_cPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STriw_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STwri_cdnPt_nv_V4 :
+ case Hexagon::POST_STwri_cPt_nv_V4 :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ return Hexagon::POST_STwri_cPt;
+
+ case Hexagon::POST_STwri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_STwri_cNotPt_nv_V4 :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+ return Hexagon::POST_STwri_cNotPt;
+
+ case Hexagon::STw_GP_cdnPt_nv_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cPt_nv_V4:
+ return Hexagon::STw_GP_cPt_V4;
+
+ case Hexagon::STw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ case Hexagon::STw_GP_cNotPt_nv_V4:
+ return Hexagon::STw_GP_cNotPt_V4;
+
+ case Hexagon::STriw_GP_cdnPt_nv_V4:
+ case Hexagon::STriw_GP_cdnPt_V4:
+ case Hexagon::STriw_GP_cPt_nv_V4:
+ return Hexagon::STriw_GP_cPt_V4;
+
+ case Hexagon::STriw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ case Hexagon::STriw_GP_cNotPt_nv_V4:
+ return Hexagon::STriw_GP_cNotPt_V4;
+
+ // Store new-value word - unconditional
+
+ case Hexagon::STriw_nv_V4:
+ return Hexagon::STriw;
+
+ case Hexagon::STriw_indexed_nv_V4:
+ return Hexagon::STriw_indexed;
+
+ case Hexagon::STriw_indexed_shl_nv_V4:
+ return Hexagon::STriw_indexed_shl_V4;
+
+ case Hexagon::STriw_shl_nv_V4:
+ return Hexagon::STriw_shl_V4;
+
+ case Hexagon::STriw_GP_nv_V4:
+ return Hexagon::STriw_GP_V4;
+
+ case Hexagon::STw_GP_nv_V4:
+ return Hexagon::STw_GP_V4;
+
+ case Hexagon::POST_STwri_nv_V4:
+ return Hexagon::POST_STwri;
+
+ // Store doubleword
+
+ case Hexagon::STrid_cdnPt_V4 :
+ return Hexagon::STrid_cPt;
+
+ case Hexagon::STrid_cdnNotPt_V4 :
+ return Hexagon::STrid_cNotPt;
+
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ return Hexagon::STrid_indexed_cPt;
+
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ return Hexagon::STrid_indexed_cNotPt;
+
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrid_indexed_shl_cPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrid_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ return Hexagon::POST_STdri_cPt;
+
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+ return Hexagon::POST_STdri_cNotPt;
+
+ case Hexagon::STd_GP_cdnPt_V4 :
+ return Hexagon::STd_GP_cPt_V4;
+
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ return Hexagon::STd_GP_cNotPt_V4;
+
+ case Hexagon::STrid_GP_cdnPt_V4 :
+ return Hexagon::STrid_GP_cPt_V4;
+
+ case Hexagon::STrid_GP_cdnNotPt_V4 :
+ return Hexagon::STrid_GP_cNotPt_V4;
+ }
+}
+
+bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ int NewOpcode = GetDotOldOp(MI->getOpcode());
+ MI->setDesc(QII->get(NewOpcode));
+ return true;
+}
+
+// Returns true if an instruction is predicated on p0 and false if it's
+// predicated on !p0.
+
+static bool GetPredicateSense(MachineInstr* MI,
+ const HexagonInstrInfo *QII) {
+
+ switch (MI->getOpcode()) {
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::STrib_cPt :
+ case Hexagon::STrib_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cPt :
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STbri_cPt :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ case Hexagon::STrih_cPt :
+ case Hexagon::STrih_cdnPt_V4 :
+ case Hexagon::STrih_indexed_cPt :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_SThri_cPt :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ case Hexagon::STriw_cPt :
+ case Hexagon::STriw_cdnPt_V4 :
+ case Hexagon::STriw_indexed_cPt :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STwri_cPt :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ case Hexagon::STrib_imm_cPt_V4 :
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ case Hexagon::STrid_cPt :
+ case Hexagon::STrid_cdnPt_V4 :
+ case Hexagon::STrid_indexed_cPt :
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STdri_cPt :
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ case Hexagon::STrih_imm_cPt_V4 :
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ case Hexagon::STriw_imm_cPt_V4 :
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ case Hexagon::JMP_cdnPt :
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::ADD_ri_cPt :
+ case Hexagon::ADD_ri_cdnPt :
+ case Hexagon::ADD_rr_cPt :
+ case Hexagon::ADD_rr_cdnPt :
+ case Hexagon::XOR_rr_cPt :
+ case Hexagon::XOR_rr_cdnPt :
+ case Hexagon::AND_rr_cPt :
+ case Hexagon::AND_rr_cdnPt :
+ case Hexagon::OR_rr_cPt :
+ case Hexagon::OR_rr_cdnPt :
+ case Hexagon::SUB_rr_cPt :
+ case Hexagon::SUB_rr_cdnPt :
+ case Hexagon::COMBINE_rr_cPt :
+ case Hexagon::COMBINE_rr_cdnPt :
+ case Hexagon::ASLH_cPt_V4 :
+ case Hexagon::ASLH_cdnPt_V4 :
+ case Hexagon::ASRH_cPt_V4 :
+ case Hexagon::ASRH_cdnPt_V4 :
+ case Hexagon::SXTB_cPt_V4 :
+ case Hexagon::SXTB_cdnPt_V4 :
+ case Hexagon::SXTH_cPt_V4 :
+ case Hexagon::SXTH_cdnPt_V4 :
+ case Hexagon::ZXTB_cPt_V4 :
+ case Hexagon::ZXTB_cdnPt_V4 :
+ case Hexagon::ZXTH_cPt_V4 :
+ case Hexagon::ZXTH_cdnPt_V4 :
+ case Hexagon::LDrid_GP_cPt_V4 :
+ case Hexagon::LDrib_GP_cPt_V4 :
+ case Hexagon::LDriub_GP_cPt_V4 :
+ case Hexagon::LDrih_GP_cPt_V4 :
+ case Hexagon::LDriuh_GP_cPt_V4 :
+ case Hexagon::LDriw_GP_cPt_V4 :
+ case Hexagon::LDd_GP_cPt_V4 :
+ case Hexagon::LDb_GP_cPt_V4 :
+ case Hexagon::LDub_GP_cPt_V4 :
+ case Hexagon::LDh_GP_cPt_V4 :
+ case Hexagon::LDuh_GP_cPt_V4 :
+ case Hexagon::LDw_GP_cPt_V4 :
+ case Hexagon::STrid_GP_cPt_V4 :
+ case Hexagon::STrib_GP_cPt_V4 :
+ case Hexagon::STrih_GP_cPt_V4 :
+ case Hexagon::STriw_GP_cPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::LDrid_GP_cdnPt_V4 :
+ case Hexagon::LDrib_GP_cdnPt_V4 :
+ case Hexagon::LDriub_GP_cdnPt_V4 :
+ case Hexagon::LDrih_GP_cdnPt_V4 :
+ case Hexagon::LDriuh_GP_cdnPt_V4 :
+ case Hexagon::LDriw_GP_cdnPt_V4 :
+ case Hexagon::LDd_GP_cdnPt_V4 :
+ case Hexagon::LDb_GP_cdnPt_V4 :
+ case Hexagon::LDub_GP_cdnPt_V4 :
+ case Hexagon::LDh_GP_cdnPt_V4 :
+ case Hexagon::LDuh_GP_cdnPt_V4 :
+ case Hexagon::LDw_GP_cdnPt_V4 :
+ case Hexagon::STrid_GP_cdnPt_V4 :
+ case Hexagon::STrib_GP_cdnPt_V4 :
+ case Hexagon::STrih_GP_cdnPt_V4 :
+ case Hexagon::STriw_GP_cdnPt_V4 :
+ case Hexagon::STd_GP_cdnPt_V4 :
+ case Hexagon::STb_GP_cdnPt_V4 :
+ case Hexagon::STh_GP_cdnPt_V4 :
+ case Hexagon::STw_GP_cdnPt_V4 :
+ return true;
+
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFRI_cdnNotPt:
+ case Hexagon::STrib_cNotPt :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cNotPt :
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STbri_cNotPt :
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ case Hexagon::STrih_cNotPt :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_cNotPt :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_SThri_cNotPt :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+ case Hexagon::STriw_cNotPt :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_cNotPt :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STwri_cNotPt :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ case Hexagon::STrid_cNotPt :
+ case Hexagon::STrid_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_cNotPt :
+ case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STdri_cNotPt :
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ case Hexagon::JMP_cdnNotPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::ADD_ri_cNotPt :
+ case Hexagon::ADD_ri_cdnNotPt :
+ case Hexagon::ADD_rr_cNotPt :
+ case Hexagon::ADD_rr_cdnNotPt :
+ case Hexagon::XOR_rr_cNotPt :
+ case Hexagon::XOR_rr_cdnNotPt :
+ case Hexagon::AND_rr_cNotPt :
+ case Hexagon::AND_rr_cdnNotPt :
+ case Hexagon::OR_rr_cNotPt :
+ case Hexagon::OR_rr_cdnNotPt :
+ case Hexagon::SUB_rr_cNotPt :
+ case Hexagon::SUB_rr_cdnNotPt :
+ case Hexagon::COMBINE_rr_cNotPt :
+ case Hexagon::COMBINE_rr_cdnNotPt :
+ case Hexagon::ASLH_cNotPt_V4 :
+ case Hexagon::ASLH_cdnNotPt_V4 :
+ case Hexagon::ASRH_cNotPt_V4 :
+ case Hexagon::ASRH_cdnNotPt_V4 :
+ case Hexagon::SXTB_cNotPt_V4 :
+ case Hexagon::SXTB_cdnNotPt_V4 :
+ case Hexagon::SXTH_cNotPt_V4 :
+ case Hexagon::SXTH_cdnNotPt_V4 :
+ case Hexagon::ZXTB_cNotPt_V4 :
+ case Hexagon::ZXTB_cdnNotPt_V4 :
+ case Hexagon::ZXTH_cNotPt_V4 :
+ case Hexagon::ZXTH_cdnNotPt_V4 :
+
+ case Hexagon::LDrid_GP_cNotPt_V4 :
+ case Hexagon::LDrib_GP_cNotPt_V4 :
+ case Hexagon::LDriub_GP_cNotPt_V4 :
+ case Hexagon::LDrih_GP_cNotPt_V4 :
+ case Hexagon::LDriuh_GP_cNotPt_V4 :
+ case Hexagon::LDriw_GP_cNotPt_V4 :
+ case Hexagon::LDd_GP_cNotPt_V4 :
+ case Hexagon::LDb_GP_cNotPt_V4 :
+ case Hexagon::LDub_GP_cNotPt_V4 :
+ case Hexagon::LDh_GP_cNotPt_V4 :
+ case Hexagon::LDuh_GP_cNotPt_V4 :
+ case Hexagon::LDw_GP_cNotPt_V4 :
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ case Hexagon::LDrid_GP_cdnNotPt_V4 :
+ case Hexagon::LDrib_GP_cdnNotPt_V4 :
+ case Hexagon::LDriub_GP_cdnNotPt_V4 :
+ case Hexagon::LDrih_GP_cdnNotPt_V4 :
+ case Hexagon::LDriuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDriw_GP_cdnNotPt_V4 :
+ case Hexagon::LDd_GP_cdnNotPt_V4 :
+ case Hexagon::LDb_GP_cdnNotPt_V4 :
+ case Hexagon::LDub_GP_cdnNotPt_V4 :
+ case Hexagon::LDh_GP_cdnNotPt_V4 :
+ case Hexagon::LDuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDw_GP_cdnNotPt_V4 :
+ case Hexagon::STrid_GP_cdnNotPt_V4 :
+ case Hexagon::STrib_GP_cdnNotPt_V4 :
+ case Hexagon::STrih_GP_cdnNotPt_V4 :
+ case Hexagon::STriw_GP_cdnNotPt_V4 :
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ case Hexagon::STb_GP_cdnNotPt_V4 :
+ case Hexagon::STh_GP_cdnNotPt_V4 :
+ case Hexagon::STw_GP_cdnNotPt_V4 :
+ return false;
+
+ default:
+ assert (false && "Unknown predicate sense of the instruction");
+ }
+ // return *some value* to avoid compiler warning
+ return false;
+}
+
+bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
+ if (isNewValueInst(MI))
+ return true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+
+// Coditional add
+ case Hexagon::ADD_ri_cdnPt:
+ case Hexagon::ADD_ri_cdnNotPt:
+ case Hexagon::ADD_rr_cdnPt:
+ case Hexagon::ADD_rr_cdnNotPt:
+
+ // Conditional logical operations
+ case Hexagon::XOR_rr_cdnPt :
+ case Hexagon::XOR_rr_cdnNotPt :
+ case Hexagon::AND_rr_cdnPt :
+ case Hexagon::AND_rr_cdnNotPt :
+ case Hexagon::OR_rr_cdnPt :
+ case Hexagon::OR_rr_cdnNotPt :
+
+ // Conditonal subtract
+ case Hexagon::SUB_rr_cdnPt :
+ case Hexagon::SUB_rr_cdnNotPt :
+
+ // Conditional combine
+ case Hexagon::COMBINE_rr_cdnPt :
+ case Hexagon::COMBINE_rr_cdnNotPt :
+
+ // Conditional shift operations
+ case Hexagon::ASLH_cdnPt_V4:
+ case Hexagon::ASLH_cdnNotPt_V4:
+ case Hexagon::ASRH_cdnPt_V4:
+ case Hexagon::ASRH_cdnNotPt_V4:
+ case Hexagon::SXTB_cdnPt_V4:
+ case Hexagon::SXTB_cdnNotPt_V4:
+ case Hexagon::SXTH_cdnPt_V4:
+ case Hexagon::SXTH_cdnNotPt_V4:
+ case Hexagon::ZXTB_cdnPt_V4:
+ case Hexagon::ZXTB_cdnNotPt_V4:
+ case Hexagon::ZXTH_cdnPt_V4:
+ case Hexagon::ZXTH_cdnNotPt_V4:
+
+ // Conditional stores
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ case Hexagon::STrib_cdnPt_V4 :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+
+ // Store doubleword conditionally
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+
+ // Store halfword conditionally
+ case Hexagon::STrih_cdnPt_V4 :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+
+ // Store word conditionally
+ case Hexagon::STriw_cdnPt_V4 :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+
+ case Hexagon::LDd_GP_cdnPt_V4:
+ case Hexagon::LDd_GP_cdnNotPt_V4:
+ case Hexagon::LDb_GP_cdnPt_V4:
+ case Hexagon::LDb_GP_cdnNotPt_V4:
+ case Hexagon::LDub_GP_cdnPt_V4:
+ case Hexagon::LDub_GP_cdnNotPt_V4:
+ case Hexagon::LDh_GP_cdnPt_V4:
+ case Hexagon::LDh_GP_cdnNotPt_V4:
+ case Hexagon::LDuh_GP_cdnPt_V4:
+ case Hexagon::LDuh_GP_cdnNotPt_V4:
+ case Hexagon::LDw_GP_cdnPt_V4:
+ case Hexagon::LDw_GP_cdnNotPt_V4:
+ case Hexagon::LDrid_GP_cdnPt_V4:
+ case Hexagon::LDrid_GP_cdnNotPt_V4:
+ case Hexagon::LDrib_GP_cdnPt_V4:
+ case Hexagon::LDrib_GP_cdnNotPt_V4:
+ case Hexagon::LDriub_GP_cdnPt_V4:
+ case Hexagon::LDriub_GP_cdnNotPt_V4:
+ case Hexagon::LDrih_GP_cdnPt_V4:
+ case Hexagon::LDrih_GP_cdnNotPt_V4:
+ case Hexagon::LDriuh_GP_cdnPt_V4:
+ case Hexagon::LDriuh_GP_cdnNotPt_V4:
+ case Hexagon::LDriw_GP_cdnPt_V4:
+ case Hexagon::LDriw_GP_cdnNotPt_V4:
+
+ case Hexagon::STrid_GP_cdnPt_V4:
+ case Hexagon::STrid_GP_cdnNotPt_V4:
+ case Hexagon::STrib_GP_cdnPt_V4:
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+ case Hexagon::STrih_GP_cdnPt_V4:
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+ case Hexagon::STriw_GP_cdnPt_V4:
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ case Hexagon::STd_GP_cdnPt_V4:
+ case Hexagon::STd_GP_cdnNotPt_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+
+ return true;
+ }
+ return false;
+}
+
+static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
+ const HexagonInstrInfo *QII) {
+ assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+#ifndef NDEBUG
+ // Post Increment means duplicates. Use dense map to find duplicates in the
+ // list. Caution: Densemap initializes with the minimum of 64 buckets,
+ // whereas there are at most 5 operands in the post increment.
+ DenseMap<unsigned, unsigned> DefRegsSet;
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).isDef()) {
+ DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
+ }
+
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).isUse()) {
+ if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
+ return MI->getOperand(opNum);
+ }
+ }
+#else
+ if (MI->getDesc().mayLoad()) {
+ // The 2nd operand is always the post increment operand in load.
+ assert(MI->getOperand(1).isReg() &&
+ "Post increment operand has be to a register.");
+ return (MI->getOperand(1));
+ }
+ if (MI->getDesc().mayStore()) {
+ // The 1st operand is always the post increment operand in store.
+ assert(MI->getOperand(0).isReg() &&
+ "Post increment operand has be to a register.");
+ return (MI->getOperand(0));
+ }
+#endif
+ // we should never come here.
+ llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
+}
+
+// get the value being stored
+static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+ // value being stored is always the last operand.
+ return (MI->getOperand(MI->getNumOperands()-1));
+}
+
+// can be new value store?
+// Following restrictions are to be respected in convert a store into
+// a new value store.
+// 1. If an instruction uses auto-increment, its address register cannot
+// be a new-value register. Arch Spec 5.4.2.1
+// 2. If an instruction uses absolute-set addressing mode,
+// its address register cannot be a new-value register.
+// Arch Spec 5.4.2.1.TODO: This is not enabled as
+// as absolute-set address mode patters are not implemented.
+// 3. If an instruction produces a 64-bit result, its registers cannot be used
+// as new-value registers. Arch Spec 5.4.2.2.
+// 4. If the instruction that sets a new-value register is conditional, then
+// the instruction that uses the new-value register must also be conditional,
+// and both must always have their predicates evaluate identically.
+// Arch Spec 5.4.2.3.
+// 5. There is an implied restriction of a packet can not have another store,
+// if there is a new value store in the packet. Corollary, if there is
+// already a store in a packet, there can not be a new value store.
+// Arch Spec: 3.4.4.2
+bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
+ MachineInstr *PacketMI, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit)
+{
+ // Make sure we are looking at the store
+ if (!IsNewifyStore(MI))
+ return false;
+
+ // Make sure there is dependency and can be new value'ed
+ if (GetStoreValueOperand(MI).isReg() &&
+ GetStoreValueOperand(MI).getReg() != DepReg)
+ return false;
+
+ const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const MCInstrDesc& MCID = PacketMI->getDesc();
+ // first operand is always the result
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI);
+
+ // if there is already an store in the packet, no can do new value store
+ // Arch Spec 3.4.4.2.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE); ++VI) {
+ SUnit* PacketSU = MIToSUnit[*VI];
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
+ // then we don't need this
+ PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME)
+ return false;
+ }
+
+ if (PacketRC == Hexagon::DoubleRegsRegisterClass) {
+ // new value store constraint: double regs can not feed into new value store
+ // arch spec section: 5.4.2.2
+ return false;
+ }
+
+ // Make sure it's NOT the post increment register that we are going to
+ // new value.
+ if (QII->isPostIncrement(MI) &&
+ MI->getDesc().mayStore() &&
+ GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+ return false;
+ }
+
+ if (QII->isPostIncrement(PacketMI) &&
+ PacketMI->getDesc().mayLoad() &&
+ GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
+ // if source is post_inc, or absolute-set addressing,
+ // it can not feed into new value store
+ // r3 = memw(r2++#4)
+ // memw(r30 + #-1404) = r2.new -> can not be new value store
+ // arch spec section: 5.4.2.1
+ return false;
+ }
+
+ // If the source that feeds the store is predicated, new value store must also be
+ // also predicated.
+ if (QII->isPredicated(PacketMI)) {
+ if (!QII->isPredicated(MI))
+ return false;
+
+ // Check to make sure that they both will have their predicates
+ // evaluate identically
+ unsigned predRegNumSrc;
+ unsigned predRegNumDst;
+ const TargetRegisterClass* predRegClass;
+
+ // Get predicate register used in the source instruction
+ for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+ if ( PacketMI->getOperand(opNum).isReg())
+ predRegNumSrc = PacketMI->getOperand(opNum).getReg();
+ predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
+ if (predRegClass == Hexagon::PredRegsRegisterClass) {
+ break;
+ }
+ }
+ assert ((predRegClass == Hexagon::PredRegsRegisterClass ) &&
+ ("predicate register not found in a predicated PacketMI instruction"));
+
+ // Get predicate register used in new-value store instruction
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ if ( MI->getOperand(opNum).isReg())
+ predRegNumDst = MI->getOperand(opNum).getReg();
+ predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
+ if (predRegClass == Hexagon::PredRegsRegisterClass) {
+ break;
+ }
+ }
+ assert ((predRegClass == Hexagon::PredRegsRegisterClass ) &&
+ ("predicate register not found in a predicated MI instruction"));
+
+ // New-value register producer and user (store) need to satisfy these
+ // constraints:
+ // 1) Both instructions should be predicated on the same register.
+ // 2) If producer of the new-value register is .new predicated then store
+ // should also be .new predicated and if producer is not .new predicated
+ // then store should not be .new predicated.
+ // 3) Both new-value register producer and user should have same predicate
+ // sense, i.e, either both should be negated or both should be none negated.
+
+ if (( predRegNumDst != predRegNumSrc) ||
+ isDotNewInst(PacketMI) != isDotNewInst(MI) ||
+ GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
+ return false;
+ }
+ }
+
+ // Make sure that other than the new-value register no other store instruction
+ // register has been modified in the same packet. Predicate registers can be
+ // modified by they should not be modified between the producer and the store
+ // instruction as it will make them both conditional on different values.
+ // We already know this to be true for all the instructions before and
+ // including PacketMI. Howerver, we need to perform the check for the
+ // remaining instructions in the packet.
+
+ std::vector<MachineInstr*>::iterator VI;
+ std::vector<MachineInstr*>::iterator VE;
+ unsigned StartCheck = 0;
+
+ for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
+ (VI != VE); ++VI) {
+ SUnit* TempSU = MIToSUnit[*VI];
+ MachineInstr* TempMI = TempSU->getInstr();
+
+ // Following condition is true for all the instructions until PacketMI is
+ // reached (StartCheck is set to 0 before the for loop).
+ // StartCheck flag is 1 for all the instructions after PacketMI.
+ if (TempMI != PacketMI && !StartCheck) // start processing only after
+ continue; // encountering PacketMI
+
+ StartCheck = 1;
+ if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+ continue;
+
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ if (MI->getOperand(opNum).isReg() &&
+ TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), QRI))
+ return false;
+ }
+ }
+
+ // Make sure that for non POST_INC stores:
+ // 1. The only use of reg is DepReg and no other registers.
+ // This handles V4 base+index registers.
+ // The following store can not be dot new.
+ // Eg. r0 = add(r0, #3)a
+ // memw(r1+r0<<#2) = r0
+ if (!QII->isPostIncrement(MI) &&
+ GetStoreValueOperand(MI).isReg() &&
+ GetStoreValueOperand(MI).getReg() == DepReg) {
+ for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).getReg() == DepReg) {
+ return false;
+ }
+ }
+ // 2. If data definition is because of implicit definition of the register,
+ // do not newify the store. Eg.
+ // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+ // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+ for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+ if (PacketMI->getOperand(opNum).isReg() &&
+ PacketMI->getOperand(opNum).getReg() == DepReg &&
+ PacketMI->getOperand(opNum).isDef() &&
+ PacketMI->getOperand(opNum).isImplicit()) {
+ return false;
+ }
+ }
+ }
+
+ // Can be dot new store.
+ return true;
+}
+
+// can this MI to promoted to either
+// new value store or new value jump
+bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
+ SUnit *PacketSU, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII)
+{
+
+ const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ if (!QRI->Subtarget.hasV4TOps() ||
+ !IsNewifyStore(MI))
+ return false;
+
+ MachineInstr *PacketMI = PacketSU->getInstr();
+
+ // Check to see the store can be new value'ed.
+ if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+ return true;
+
+ // Check to see the compare/jump can be new value'ed.
+ // This is done as a pass on its own. Don't need to check it here.
+ return false;
+}
+
+// Check to see if an instruction can be dot new
+// There are three kinds.
+// 1. dot new on predicate - V2/V3/V4
+// 2. dot new on stores NV/ST - V4
+// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
+ SUnit *PacketSU, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC )
+{
+ // already a dot new instruction
+ if (isDotNewInst(MI) && !IsNewifyStore(MI))
+ return false;
+
+ if (!isNewifiable(MI))
+ return false;
+
+ // predicate .new
+ if (RC == Hexagon::PredRegsRegisterClass && isCondInst(MI))
+ return true;
+ else if (RC != Hexagon::PredRegsRegisterClass &&
+ !IsNewifyStore(MI)) // MI is not a new-value store
+ return false;
+ else {
+ // Create a dot new machine instruction to see if resources can be
+ // allocated. If not, bail out now.
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ int NewOpcode = GetDotNewOp(MI->getOpcode());
+ const MCInstrDesc &desc = QII->get(NewOpcode);
+ DebugLoc dl;
+ MachineInstr *NewMI = MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
+ bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
+
+ if (!ResourcesAvailable)
+ return false;
+
+ // new value store only
+ // new new value jump generated as a passes
+ if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Go through the packet instructions and search for anti dependency
+// between them and DepReg from MI
+// Consider this case:
+// Trying to add
+// a) %R1<def> = TFRI_cdNotPt %P3, 2
+// to this packet:
+// {
+// b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
+// c) %P3<def> = TFR_PdRs %R23
+// d) %R1<def> = TFRI_cdnPt %P3, 4
+// }
+// The P3 from a) and d) will be complements after
+// a)'s P3 is converted to .new form
+// Anti Dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ SUnit* PacketSUDep = MIToSUnit[MI];
+
+ for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+ VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+ // We only care for dependencies to predicated instructions
+ if(!QII->isPredicated(*VIN)) continue;
+
+ // Scheduling Unit for current insn in the packet
+ SUnit* PacketSU = MIToSUnit[*VIN];
+
+ // Look at dependencies between current members of the packet
+ // and predicate defining instruction MI.
+ // Make sure that dependency is on the exact register
+ // we care about.
+ if (PacketSU->isSucc(PacketSUDep)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
+ (PacketSU->Succs[i].getKind() == SDep::Anti) &&
+ (PacketSU->Succs[i].getReg() == DepReg)) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+
+// Given two predicated instructions, this function detects whether
+// the predicates are complements
+bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
+ MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // Currently can only reason about conditional transfers
+ if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) {
+ return false;
+ }
+
+ // Scheduling unit for candidate
+ SUnit* SU = MIToSUnit[MI1];
+
+ // One corner case deals with the following scenario:
+ // Trying to add
+ // a) %R24<def> = TFR_cPt %P0, %R25
+ // to this packet:
+ //
+ // {
+ // b) %R25<def> = TFR_cNotPt %P0, %R24
+ // c) %P0<def> = CMPEQri %R26, 1
+ // }
+ //
+ // On general check a) and b) are complements, but
+ // presence of c) will convert a) to .new form, and
+ // then it is not a complement
+ // We attempt to detect it by analyzing existing
+ // dependencies in the packet
+
+ // Analyze relationships between all existing members of the packet.
+ // Look for Anti dependecy on the same predicate reg
+ // as used in the candidate
+ for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+ VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+ // Scheduling Unit for current insn in the packet
+ SUnit* PacketSU = MIToSUnit[*VIN];
+
+ // If this instruction in the packet is succeeded by the candidate...
+ if (PacketSU->isSucc(SU)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ // The corner case exist when there is true data
+ // dependency between candidate and one of current
+ // packet members, this dep is on predicate reg, and
+ // there already exist anti dep on the same pred in
+ // the packet.
+ if (PacketSU->Succs[i].getSUnit() == SU &&
+ Hexagon::PredRegsRegisterClass->contains(
+ PacketSU->Succs[i].getReg()) &&
+ PacketSU->Succs[i].getKind() == SDep::Data &&
+ // Here I know that *VIN is predicate setting instruction
+ // with true data dep to candidate on the register
+ // we care about - c) in the above example.
+ // Now I need to see if there is an anti dependency
+ // from c) to any other instruction in the
+ // same packet on the pred reg of interest
+ RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
+ MIToSUnit)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ // If the above case does not apply, check regular
+ // complement condition.
+ // Check that the predicate register is the same and
+ // that the predicate sense is different
+ // We also need to differentiate .old vs. .new:
+ // !p0 is not complimentary to p0.new
+ return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
+ (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
+ (isDotNewInst(MI1) == isDotNewInst(MI2)));
+}
+
+// initPacketizerState - Initialize packetizer flags
+void HexagonPacketizerList::initPacketizerState() {
+
+ Dependence = false;
+ PromotedToDotNew = false;
+ GlueToNewValueJump = false;
+ GlueAllocframeStore = false;
+ FoundSequentialDependence = false;
+
+ return;
+}
+
+// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (MI->isDebugValue())
+ return true;
+
+ // We must print out inline assembly
+ if (MI->isInlineAsm())
+ return false;
+
+ // We check if MI has any functional units mapped to it.
+ // If it doesn't, we ignore the instruction.
+ const MCInstrDesc& TID = MI->getDesc();
+ unsigned SchedClass = TID.getSchedClass();
+ const InstrStage* IS = ResourceTracker->getInstrItins()->beginStage(SchedClass);
+ unsigned FuncUnits = IS->getUnits();
+ return !FuncUnits;
+}
+
+// isSoloInstruction: - Returns true for instructions that must be
+// scheduled in their own packet.
+bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+
+ if (MI->isInlineAsm())
+ return true;
+
+ if (MI->isEHLabel())
+ return true;
+
+ // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
+ // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
+ // They must not be grouped with other instructions in a packet.
+ if (IsSchedBarrier(MI))
+ return true;
+
+ return false;
+}
+
+// isLegalToPacketizeTogether:
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ const MCInstrDesc &MCIDI = I->getDesc();
+ const MCInstrDesc &MCIDJ = J->getDesc();
+
+ MachineBasicBlock::iterator II = I;
+
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+ const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ // Inline asm cannot go in the packet.
+ if (I->getOpcode() == Hexagon::INLINEASM)
+ llvm_unreachable("Should not meet inline asm here!");
+
+ if (isSoloInstruction(I))
+ llvm_unreachable("Should not meet solo instr here!");
+
+ // A save callee-save register function call can only be in a packet
+ // with instructions that don't write to the callee-save registers.
+ if ((QII->isSaveCalleeSavedRegsCall(I) &&
+ DoesModifyCalleeSavedReg(J, QRI)) ||
+ (QII->isSaveCalleeSavedRegsCall(J) &&
+ DoesModifyCalleeSavedReg(I, QRI))) {
+ Dependence = true;
+ return false;
+ }
+
+ // Two control flow instructions cannot go in the same packet.
+ if (IsControlFlow(I) && IsControlFlow(J)) {
+ Dependence = true;
+ return false;
+ }
+
+ // A LoopN instruction cannot appear in the same packet as a jump or call.
+ if (IsLoopN(I) && ( IsDirectJump(J)
+ || MCIDJ.isCall()
+ || QII->isDeallocRet(J))) {
+ Dependence = true;
+ return false;
+ }
+ if (IsLoopN(J) && ( IsDirectJump(I)
+ || MCIDI.isCall()
+ || QII->isDeallocRet(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ // dealloc_return cannot appear in the same packet as a conditional or
+ // unconditional jump.
+ if (QII->isDeallocRet(I) && ( MCIDJ.isBranch()
+ || MCIDJ.isCall()
+ || MCIDJ.isBarrier())) {
+ Dependence = true;
+ return false;
+ }
+
+
+ // V4 allows dual store. But does not allow second store, if the
+ // first store is not in SLOT0. New value store, new value jump,
+ // dealloc_return and memop always take SLOT0.
+ // Arch spec 3.4.4.2
+ if (QRI->Subtarget.hasV4TOps()) {
+
+ if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) {
+ Dependence = true;
+ return false;
+ }
+
+ if ( (QII->isMemOp(J) && MCIDI.mayStore())
+ || (MCIDJ.mayStore() && QII->isMemOp(I))
+ || (QII->isMemOp(J) && QII->isMemOp(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ //if dealloc_return
+ if (MCIDJ.mayStore() && QII->isDeallocRet(I)){
+ Dependence = true;
+ return false;
+ }
+
+ // If an instruction feeds new value jump, glue it.
+ MachineBasicBlock::iterator NextMII = I;
+ ++NextMII;
+ MachineInstr *NextMI = NextMII;
+
+ if (QII->isNewValueJump(NextMI)) {
+
+ bool secondRegMatch = false;
+ bool maintainNewValueJump = false;
+
+ if (NextMI->getOperand(1).isReg() &&
+ I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+ secondRegMatch = true;
+ maintainNewValueJump = true;
+ }
+
+ if (!secondRegMatch &&
+ I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
+ maintainNewValueJump = true;
+ }
+
+ for (std::vector<MachineInstr*>::iterator
+ VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE && maintainNewValueJump); ++VI) {
+ SUnit* PacketSU = MIToSUnit[*VI];
+
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8
+ if (PacketSU->getInstr()->getDesc().isCall()) {
+ Dependence = true;
+ break;
+ }
+ // Validate
+ // 1. Packet does not have a store in it.
+ // 2. If the first operand of the nvj is newified, and the second
+ // operand is also a reg, it (second reg) is not defined in
+ // the same packet.
+ // 3. If the second operand of the nvj is newified, (which means
+ // first operand is also a reg), first reg is not defined in
+ // the same packet.
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ // Check #2.
+ (!secondRegMatch && NextMI->getOperand(1).isReg() &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(1).getReg(), QRI)) ||
+ // Check #3.
+ (secondRegMatch &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(0).getReg(), QRI))) {
+ Dependence = true;
+ break;
+ }
+ }
+ if (!Dependence)
+ GlueToNewValueJump = true;
+ else
+ return false;
+ }
+ }
+
+ if (SUJ->isSucc(SUI)) {
+ for (unsigned i = 0;
+ (i < SUJ->Succs.size()) && !FoundSequentialDependence;
+ ++i) {
+
+ if (SUJ->Succs[i].getSUnit() != SUI) {
+ continue;
+ }
+
+ SDep::Kind DepType = SUJ->Succs[i].getKind();
+
+ // For direct calls:
+ // Ignore register dependences for call instructions for
+ // packetization purposes except for those due to r31 and
+ // predicate registers.
+ //
+ // For indirect calls:
+ // Same as direct calls + check for true dependences to the register
+ // used in the indirect call.
+ //
+ // We completely ignore Order dependences for call instructions
+ //
+ // For returns:
+ // Ignore register dependences for return instructions like jumpr,
+ // dealloc return unless we have dependencies on the explicit uses
+ // of the registers used by jumpr (like r31) or dealloc return
+ // (like r29 or r30).
+ //
+ // TODO: Currently, jumpr is handling only return of r31. So, the
+ // following logic (specificaly IsCallDependent) is working fine.
+ // We need to enable jumpr for register other than r31 and then,
+ // we need to rework the last part, where it handles indirect call
+ // of that (IsCallDependent) function. Bug 6216 is opened for this.
+ //
+ unsigned DepReg;
+ const TargetRegisterClass* RC;
+ if (DepType == SDep::Data) {
+ DepReg = SUJ->Succs[i].getReg();
+ RC = QRI->getMinimalPhysRegClass(DepReg);
+ }
+ if ((MCIDI.isCall() || MCIDI.isReturn()) &&
+ (!IsRegDependence(DepType) ||
+ !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
+ /* do nothing */
+ }
+
+ // For instructions that can be promoted to dot-new, try to promote.
+ else if ((DepType == SDep::Data) &&
+ CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
+ PromoteToDotNew(I, DepType, II, RC)) {
+ PromotedToDotNew = true;
+ /* do nothing */
+ }
+
+ else if ((DepType == SDep::Data) &&
+ (QII->isNewValueJump(I))) {
+ /* do nothing */
+ }
+
+ // For predicated instructions, if the predicates are complements
+ // then there can be no dependence.
+ else if (QII->isPredicated(I) &&
+ QII->isPredicated(J) &&
+ ArePredicatesComplements(I, J, MIToSUnit)) {
+ /* do nothing */
+
+ }
+ else if (IsDirectJump(I) &&
+ !MCIDJ.isBranch() &&
+ !MCIDJ.isCall() &&
+ (DepType == SDep::Order)) {
+ // Ignore Order dependences between unconditional direct branches
+ // and non-control-flow instructions
+ /* do nothing */
+ }
+ else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
+ (DepType != SDep::Output)) {
+ // Ignore all dependences for jumps except for true and output
+ // dependences
+ /* do nothing */
+ }
+
+ // Ignore output dependences due to superregs. We can
+ // write to two different subregisters of R1:0 for instance
+ // in the same cycle
+ //
+
+ //
+ // Let the
+ // If neither I nor J defines DepReg, then this is a
+ // superfluous output dependence. The dependence must be of the
+ // form:
+ // R0 = ...
+ // R1 = ...
+ // and there is an output dependence between the two instructions
+ // with
+ // DepReg = D0
+ // We want to ignore these dependences.
+ // Ideally, the dependence constructor should annotate such
+ // dependences. We can then avoid this relatively expensive check.
+ //
+ else if (DepType == SDep::Output) {
+ // DepReg is the register that's responsible for the dependence.
+ unsigned DepReg = SUJ->Succs[i].getReg();
+
+ // Check if I and J really defines DepReg.
+ if (I->definesRegister(DepReg) ||
+ J->definesRegister(DepReg)) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ // We ignore Order dependences for
+ // 1. Two loads unless they are volatile.
+ // 2. Two stores in V4 unless they are volatile.
+ else if ((DepType == SDep::Order) &&
+ !I->hasVolatileMemoryRef() &&
+ !J->hasVolatileMemoryRef()) {
+ if (QRI->Subtarget.hasV4TOps() &&
+ // hexagonv4 allows dual store.
+ MCIDI.mayStore() && MCIDJ.mayStore()) {
+ /* do nothing */
+ }
+ // store followed by store-- not OK on V2
+ // store followed by load -- not OK on all (OK if addresses
+ // are not aliased)
+ // load followed by store -- OK on all
+ // load followed by load -- OK on all
+ else if ( !MCIDJ.mayStore()) {
+ /* do nothing */
+ }
+ else {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ // For V4, special case ALLOCFRAME. Even though there is dependency
+ // between ALLOCAFRAME and subsequent store, allow it to be
+ // packetized in a same packet. This implies that the store is using
+ // caller's SP. Hense, offset needs to be updated accordingly.
+ else if (DepType == SDep::Data
+ && QRI->Subtarget.hasV4TOps()
+ && J->getOpcode() == Hexagon::ALLOCFRAME
+ && (I->getOpcode() == Hexagon::STrid
+ || I->getOpcode() == Hexagon::STriw
+ || I->getOpcode() == Hexagon::STrib)
+ && I->getOperand(0).getReg() == QRI->getStackRegister()
+ && QII->isValidOffset(I->getOpcode(),
+ I->getOperand(1).getImm() -
+ (FrameSize + HEXAGON_LRFP_SIZE)))
+ {
+ GlueAllocframeStore = true;
+ // Since this store is to be glued with allocframe in the same
+ // packet, it will use SP of the previous stack frame, i.e
+ // caller's SP. Therefore, we need to recalculate offset according
+ // to this change.
+ I->getOperand(1).setImm(I->getOperand(1).getImm() -
+ (FrameSize + HEXAGON_LRFP_SIZE));
+ }
+
+ //
+ // Skip over anti-dependences. Two instructions that are
+ // anti-dependent can share a packet
+ //
+ else if (DepType != SDep::Anti) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ if (FoundSequentialDependence) {
+ Dependence = true;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// isLegalToPruneDependencies
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
+
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+ if (Dependence) {
+
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old.
+ if (PromotedToDotNew) {
+ DemoteToDotOld(I);
+ }
+
+ // Check if the instruction (must be a store) was glued with an Allocframe
+ // instruction. If so, restore its offset to its original value, i.e. use
+ // curent SP instead of caller's SP.
+ if (GlueAllocframeStore) {
+ I->getOperand(1).setImm(I->getOperand(1).getImm() +
+ FrameSize + HEXAGON_LRFP_SIZE);
+ }
+
+ return false;
+ }
+ return true;
+}
+
+MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ if (GlueToNewValueJump) {
+
+ ++MII;
+ MachineInstr *nvjMI = MII;
+ assert(ResourceTracker->canReserveResources(MI));
+ ResourceTracker->reserveResources(MI);
+ if (QII->isExtended(MI) &&
+ !tryAllocateResourcesForConstExt(MI)) {
+ endPacket(MBB, MI);
+ ResourceTracker->reserveResources(MI);
+ assert(canReserveResourcesForConstExt(MI) &&
+ "Ensure that there is a slot");
+ reserveResourcesForConstExt(MI);
+ // Reserve resources for new value jump constant extender.
+ assert(canReserveResourcesForConstExt(MI) &&
+ "Ensure that there is a slot");
+ reserveResourcesForConstExt(nvjMI);
+ assert(ResourceTracker->canReserveResources(nvjMI) &&
+ "Ensure that there is a slot");
+
+ } else if ( // Extended instruction takes two slots in the packet.
+ // Try reserve and allocate 4-byte in the current packet first.
+ (QII->isExtended(nvjMI)
+ && (!tryAllocateResourcesForConstExt(nvjMI)
+ || !ResourceTracker->canReserveResources(nvjMI)))
+ || // For non-extended instruction, no need to allocate extra 4 bytes.
+ (!QII->isExtended(nvjMI) && !ResourceTracker->canReserveResources(nvjMI)))
+ {
+ endPacket(MBB, MI);
+ // A new and empty packet starts.
+ // We are sure that the resources requirements can be satisfied.
+ // Therefore, do not need to call "canReserveResources" anymore.
+ ResourceTracker->reserveResources(MI);
+ if (QII->isExtended(nvjMI))
+ reserveResourcesForConstExt(nvjMI);
+ }
+ // Here, we are sure that "reserveResources" would succeed.
+ ResourceTracker->reserveResources(nvjMI);
+ CurrentPacketMIs.push_back(MI);
+ CurrentPacketMIs.push_back(nvjMI);
+ } else {
+ if ( QII->isExtended(MI)
+ && ( !tryAllocateResourcesForConstExt(MI)
+ || !ResourceTracker->canReserveResources(MI)))
+ {
+ endPacket(MBB, MI);
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old
+ if (PromotedToDotNew) {
+ DemoteToDotOld(MI);
+ }
+ reserveResourcesForConstExt(MI);
+ }
+ // In case that "MI" is not an extended insn,
+ // the resource availability has already been checked.
+ ResourceTracker->reserveResources(MI);
+ CurrentPacketMIs.push_back(MI);
+ }
+ return MII;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonPacketizer() {
+ return new HexagonPacketizer();
+}
+
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
new file mode 100644
index 000000000000..9305c2702fa4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -0,0 +1,141 @@
+//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that assign locations to outgoing function
+// arguments. Adapted from the target independent version but this handles
+// calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+ unsigned ByValSize = 0;
+ if (ArgFlags.isByVal() &&
+ ((ByValSize = ArgFlags.getByValSize()) >
+ (MVT(MVT::i64).getSizeInBits() / 8))) {
+ ForceMem = true;
+ }
+
+
+ // Only assign registers for named (non varargs) arguments
+ if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
+ NonVarArgsParams))) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::i16 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+ // If it's passed by value, then we need the size of the aggregate not of
+ // the pointer.
+ if (ArgFlags.isByVal()) {
+ Size = ByValSize;
+
+ // Hexagon_TODO: Get the alignment of the contained type here.
+ Alignment = 8;
+ }
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
diff --git a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..cb106a884432
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMHexagonAsmPrinter
+ HexagonInstPrinter.cpp
+ )
+
+add_dependencies(LLVMHexagonAsmPrinter HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
new file mode 100644
index 000000000000..75d6bfb0813a
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -0,0 +1,198 @@
+//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonInstPrinter.h"
+#include "HexagonMCInst.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "HexagonGenAsmWriter.inc"
+
+StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return MII.getName(Opcode);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
+}
+
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInst((const HexagonMCInst*)(MI), O, Annot);
+}
+
+void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ const char packetPadding[] = " ";
+ const char startPacket = '{',
+ endPacket = '}';
+ // TODO: add outer HW loop when it's supported too.
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ // Ending a harware loop is different from ending an regular packet.
+ assert(MI->isEndPacket() && "Loop end must also end the packet");
+
+ if (MI->isStartPacket()) {
+ // There must be a packet to end a loop.
+ // FIXME: when shuffling is always run, this shouldn't be needed.
+ HexagonMCInst Nop;
+ StringRef NoAnnot;
+
+ Nop.setOpcode (Hexagon::NOP);
+ Nop.setStartPacket (MI->isStartPacket());
+ printInst (&Nop, O, NoAnnot);
+ }
+
+ // Close the packet.
+ if (MI->isEndPacket())
+ O << packetPadding << endPacket;
+
+ printInstruction(MI, O);
+ }
+ else {
+ // Prefix the insn opening the packet.
+ if (MI->isStartPacket())
+ O << packetPadding << startPacket << '\n';
+
+ printInstruction(MI, O);
+
+ // Suffix the insn closing the packet.
+ if (MI->isEndPacket())
+ // Suffix the packet in a new line always, since the GNU assembler has
+ // issues with a closing brace on the same line as CONST{32,64}.
+ O << '\n' << packetPadding << endPacket;
+ }
+
+ printAnnotation(O, Annot);
+}
+
+void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ if (MO.isReg()) {
+ O << getRegisterName(MO.getReg());
+ } else if(MO.isExpr()) {
+ O << *MO.getExpr();
+ } else if(MO.isImm()) {
+ printImmOperand(MI, OpNo, O);
+ } else {
+ assert(false && "Unknown operand");
+ }
+}
+
+void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << -MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << -1;
+}
+
+void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO0 = MI->getOperand(OpNo);
+ const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+
+ O << getRegisterName(MO0.getReg());
+ O << " + #" << MO1.getImm();
+}
+
+void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand& MO0 = MI->getOperand(OpNo);
+ const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+
+ O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm();
+}
+
+void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ assert("Unknown branch operand.");
+}
+
+void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, bool hi) const {
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ O << '#' << (hi? "HI": "LO") << '(';
+ if (MO.isImm()) {
+ O << '#';
+ printOperand(MI, OpNo, O);
+ } else {
+ assert("Unknown symbol operand");
+ printOperand(MI, OpNo, O);
+ }
+ O << ')';
+}
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
new file mode 100644
index 000000000000..3ce7dfcbdbe2
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -0,0 +1,75 @@
+//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONINSTPRINTER_H
+#define HEXAGONINSTPRINTER_H
+
+#include "HexagonMCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class HexagonInstPrinter : public MCInstPrinter {
+ public:
+ explicit HexagonInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ StringRef getRegName(unsigned RegNo) const;
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+ const;
+ void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
+ { printSymbol(MI, OpNo, O, true); }
+ void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
+ { printSymbol(MI, OpNo, O, false); }
+
+ bool isConstExtended(const MCInst *MI) const;
+ protected:
+ void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
+ const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..8678401feee4
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonAsmPrinter
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Hexagon/InstPrinter/Makefile
index c49cfbe69077..20331d8807ec 100644
--- a/lib/Target/Blackfin/TargetInfo/Makefile
+++ b/lib/Target/Hexagon/InstPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===##
+##===- lib/Target/Hexagon/InstPrinter/Makefile ----------------------------===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,9 +7,9 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMBlackfinInfo
+LIBRARYNAME = LLVMHexagonAsmPrinter
-# Hack: we need to include 'main' target directory to grab private headers
+# Hack: we need to include 'main' Hexagon target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
new file mode 100644
index 000000000000..c6d419a91058
--- /dev/null
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = Hexagon
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = HexagonCodeGen
+parent = Hexagon
+required_libraries = AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC SelectionDAG Support Target
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..c9c5a6eadf88
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMHexagonDesc
+ HexagonMCAsmInfo.cpp
+ HexagonMCTargetDesc.cpp
+ )
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
new file mode 100644
index 000000000000..7221e906342e
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Hexagon target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBASEINFO_H
+#define HEXAGONBASEINFO_H
+
+namespace llvm {
+
+/// HexagonII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace HexagonII {
+ // *** The code below must match HexagonInstrFormat*.td *** //
+
+ // Insn types.
+ // *** Must match HexagonInstrFormat*.td ***
+ enum Type {
+ TypePSEUDO = 0,
+ TypeALU32 = 1,
+ TypeCR = 2,
+ TypeJR = 3,
+ TypeJ = 4,
+ TypeLD = 5,
+ TypeST = 6,
+ TypeSYSTEM = 7,
+ TypeXTYPE = 8,
+ TypeMEMOP = 9,
+ TypeNV = 10,
+ TypePREFIX = 30, // Such as extenders.
+ TypeMARKER = 31 // Such as end of a HW loop.
+ };
+
+
+
+ // MCInstrDesc TSFlags
+ // *** Must match HexagonInstrFormat*.td ***
+ enum {
+ // This 5-bit field describes the insn type.
+ TypePos = 0,
+ TypeMask = 0x1f,
+
+ // Solo instructions.
+ SoloPos = 5,
+ SoloMask = 0x1,
+
+ // Predicated instructions.
+ PredicatedPos = 6,
+ PredicatedMask = 0x1
+ };
+
+ // *** The code above must match HexagonInstrFormat*.td *** //
+
+} // End namespace HexagonII.
+
+} // End namespace llvm.
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 000000000000..d6e6c36af5de
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,36 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "//";
+ HasLEB128 = true;
+
+ PrivateGlobalPrefix = ".L";
+ LCOMMDirectiveType = LCOMM::ByteAlignment;
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ ZeroDirective = "\t.space\t";
+ AscizDirective = "\t.string\t";
+ WeakRefDirective = "\t.weak\t";
+
+ UsesELFSectionDirectiveForBSS = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index 837844bd29a9..d336cd5be917 100644
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the declaration of the AlphaMCAsmInfo class.
+// This file contains the declaration of the HexagonMCAsmInfo class.
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHATARGETASMINFO_H
-#define ALPHATARGETASMINFO_H
+#ifndef HexagonMCASMINFO_H
+#define HexagonMCASMINFO_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -20,8 +20,9 @@
namespace llvm {
class Target;
- struct AlphaMCAsmInfo : public MCAsmInfo {
- explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
+ class HexagonMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit HexagonMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 000000000000..3cfa4fddd87c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,95 @@
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "HexagonMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R0);
+ return X;
+}
+
+static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT);
+
+ // VirtualFP = (R30 + #0).
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Hexagon::R30, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+ createHexagonMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+ createHexagonMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+ createHexagonMCSubtargetInfo);
+}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index b0619e6cb011..2238b1ae5f35 100644
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,34 +7,33 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Alpha specific target descriptions.
+// This file provides Hexagon specific target descriptions.
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHAMCTARGETDESC_H
-#define ALPHAMCTARGETDESC_H
+#ifndef HEXAGONMCTARGETDESC_H
+#define HEXAGONMCTARGETDESC_H
namespace llvm {
class MCSubtargetInfo;
class Target;
-class StringRef;
-extern Target TheAlphaTarget;
+extern Target TheHexagonTarget;
} // End llvm namespace
-// Defines symbolic names for Alpha registers. This defines a mapping from
+// Define symbolic names for Hexagon registers. This defines a mapping from
// register name to register number.
//
#define GET_REGINFO_ENUM
-#include "AlphaGenRegisterInfo.inc"
+#include "HexagonGenRegisterInfo.inc"
-// Defines symbolic names for the Alpha instructions.
+// Defines symbolic names for the Hexagon instructions.
//
#define GET_INSTRINFO_ENUM
-#include "AlphaGenInstrInfo.inc"
+#include "HexagonGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
-#include "AlphaGenSubtargetInfo.inc"
+#include "HexagonGenSubtargetInfo.inc"
#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..73c7e016f939
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonDesc
+parent = Hexagon
+required_libraries = HexagonInfo MC
+add_to_library_groups = Hexagon
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
index 08f1a9d51fb5..885be2ddbd88 100644
--- a/lib/Target/SystemZ/MCTargetDesc/Makefile
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDesc
+LIBRARYNAME = LLVMHexagonDesc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
new file mode 100644
index 000000000000..dc387c549a1d
--- /dev/null
+++ b/lib/Target/Hexagon/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMHexagonCodeGen
+TARGET = Hexagon
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = HexagonGenRegisterInfo.inc \
+ HexagonGenInstrInfo.inc \
+ HexagonGenAsmWriter.inc \
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+ HexagonGenCallingConv.inc \
+ HexagonGenDFAPacketizer.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..5b04a30d26c2
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMHexagonInfo
+ HexagonTargetInfo.cpp
+ )
+
+add_dependencies(LLVMHexagonInfo HexagonCommonTableGen)
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index da99282ecb04..7aa5dd3b8980 100644
--- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "SystemZ.h"
+#include "Hexagon.h"
#include "llvm/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-Target llvm::TheSystemZTarget;
+Target llvm::TheHexagonTarget;
-extern "C" void LLVMInitializeSystemZTargetInfo() {
- RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+ RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon");
}
diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..7b87be3e05a8
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonInfo
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile
index de01d7f8e8ef..494cca112249 100644
--- a/lib/Target/Alpha/TargetInfo/Makefile
+++ b/lib/Target/Hexagon/TargetInfo/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,7 +7,7 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMAlphaInfo
+LIBRARYNAME = LLVMHexagonInfo
# Hack: we need to include 'main' target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
new file mode 100644
index 000000000000..8ec5673470fc
--- /dev/null
+++ b/lib/Target/LLVMBuild.txt
@@ -0,0 +1,56 @@
+;===- ./lib/Target/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with the best execution engine (the native JIT, if available, or the
+; interpreter).
+[component_0]
+type = LibraryGroup
+name = Engine
+parent = Libraries
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with the configured native target, if any.
+[component_1]
+type = LibraryGroup
+name = Native
+parent = Libraries
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with the configured native code generator, if any.
+[component_2]
+type = LibraryGroup
+name = NativeCodeGen
+parent = Libraries
+
+; The component for the actual target library itself.
+[component_3]
+type = Library
+name = Target
+parent = Libraries
+required_libraries = Core MC Support
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with every built target, which makes it easy for tools to include every
+; target.
+[component_4]
+type = LibraryGroup
+name = all-targets
+parent = Libraries
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index ec8f52a92cb1..813767ba6d65 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser
MBlazeAsmParser.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeAsmParser
- LLVMMBlazeInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..b10189a9dd97
--- /dev/null
+++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/AsmParser/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeAsmParser
+parent = MBlaze
+required_libraries = MBlazeInfo MC MCParser Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 2d357bb9674d..59a1ed97d3d4 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -9,10 +9,6 @@
#include "MCTargetDesc/MBlazeBaseInfo.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -100,11 +96,7 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
return AsmToken(lexedToken);
case AsmToken::Identifier:
{
- std::string upperCase = lexedToken.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
- StringRef lowerRef(lowerCase);
-
- unsigned regID = MatchRegisterName(lowerRef);
+ unsigned regID = MatchRegisterName(lexedToken.getString().lower());
if (regID) {
return AsmToken(AsmToken::Register,
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 97d311c15107..38fb0e87fdb4 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -18,9 +18,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -347,7 +345,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
llvm_unreachable("Implement any new match types added!");
- return true;
}
MBlazeOperand *MBlazeAsmParser::
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 47b0db2cb2d0..bf1deef491c9 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -1,15 +1,15 @@
set(LLVM_TARGET_DEFINITIONS MBlaze.td)
-llvm_tablegen(MBlazeGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(MBlazeGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
-llvm_tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM MBlazeGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MBlazeGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM MBlazeGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM MBlazeGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(MBlazeCommonTableGen)
add_llvm_target(MBlazeCodeGen
@@ -18,6 +18,7 @@ add_llvm_target(MBlazeCodeGen
MBlazeISelDAGToDAG.cpp
MBlazeISelLowering.cpp
MBlazeFrameLowering.cpp
+ MBlazeMachineFunction.cpp
MBlazeRegisterInfo.cpp
MBlazeSubtarget.cpp
MBlazeTargetMachine.cpp
@@ -29,19 +30,6 @@ add_llvm_target(MBlazeCodeGen
MBlazeELFWriterInfo.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMBlazeAsmPrinter
- LLVMMBlazeDesc
- LLVMMBlazeInfo
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
index 112c64c02638..be2dce156d56 100644
--- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -13,12 +13,4 @@ set_property(
)
endif()
-add_llvm_library_dependencies(LLVMMBlazeDisassembler
- LLVMMBlazeCodeGen
- LLVMMBlazeDesc
- LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..28dd9dc98da6
--- /dev/null
+++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/Disassembler/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeDisassembler
+parent = MBlaze
+required_libraries = MBlazeDesc MBlazeInfo MC Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index fd761f1ca8c1..6b958c85eebf 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze ----*- C++ -*-===//
+//===-- MBlazeDisassembler.cpp - Disassembler for MicroBlaze -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +13,12 @@
//===----------------------------------------------------------------------===//
#include "MBlaze.h"
-#include "MBlazeInstrInfo.h"
#include "MBlazeDisassembler.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
@@ -30,14 +29,14 @@
#include "MBlazeGenEDInfo.inc"
namespace llvm {
-extern MCInstrDesc MBlazeInsts[];
+extern const MCInstrDesc MBlazeInsts[];
}
using namespace llvm;
-const unsigned UNSUPPORTED = -1;
+const uint16_t UNSUPPORTED = -1;
-static unsigned mblazeBinary2Opcode[] = {
+static const uint16_t mblazeBinary2Opcode[] = {
MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
@@ -124,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) {
case 0x41: return MBlaze::SRL;
case 0x21: return MBlaze::SRC;
case 0x01: return MBlaze::SRA;
+ case 0xE0: return MBlaze::CLZ;
}
}
@@ -177,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) {
}
static unsigned decodeBRI(uint32_t insn) {
+ switch (insn&0x3FFFFFF) {
+ default: break;
+ case 0x0020004: return MBlaze::IDMEMBAR;
+ case 0x0220004: return MBlaze::DMEMBAR;
+ case 0x0420004: return MBlaze::IMEMBAR;
+ }
+
switch ((insn>>16)&0x1F) {
default: return UNSUPPORTED;
case 0x00: return MBlaze::BRI;
@@ -485,7 +492,7 @@ static unsigned getOPCODE(uint32_t insn) {
}
}
-EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+const EDInstInfo *MBlazeDisassembler::getEDInfo() const {
return instInfoMBlaze;
}
@@ -532,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
default:
return Fail;
+ case MBlazeII::FC:
+ break;
+
case MBlazeII::FRRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
return Fail;
@@ -548,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
instr.addOperand(MCOperand::CreateReg(RB));
break;
+ case MBlazeII::FRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
case MBlazeII::FRI:
switch (opcode) {
default:
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index 0ac0d89efbe7..5c4ae3b1ace8 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -1,4 +1,4 @@
-//===- MBlazeDisassembler.h - Disassembler for MicroBlaze ------*- C++ -*-===//
+//===-- MBlazeDisassembler.h - Disassembler for MicroBlaze -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,8 +17,6 @@
#include "llvm/MC/MCDisassembler.h"
-struct InternalInstruction;
-
namespace llvm {
class MCInst;
@@ -48,7 +46,7 @@ public:
raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
+ const EDInstInfo *getEDInfo() const;
};
} // namespace llvm
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index aff0b3d992d4..586e2d3eefc3 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
MBlazeInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..3a21a0560aef
--- /dev/null
+++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/InstPrinter/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeAsmPrinter
+parent = MBlaze
+required_libraries = MC Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 570ab08a07aa..51ba7c359a1b 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//= MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax -*- C++ -*-//
//
// The LLVM Compiler Infrastructure
//
@@ -21,15 +21,15 @@ namespace llvm {
class MBlazeInstPrinter : public MCInstPrinter {
public:
- MBlazeInstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ MBlazeInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = 0);
diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt
new file mode 100644
index 000000000000..0b290076a4e9
--- /dev/null
+++ b/lib/Target/MBlaze/LLVMBuild.txt
@@ -0,0 +1,34 @@
+;===- ./lib/Target/MBlaze/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = MBlaze
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+
+[component_1]
+type = Library
+name = MBlazeCodeGen
+parent = MBlaze
+required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 1245658d29ba..b4edff0709e6 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -1,4 +1,4 @@
-//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
+//===-- MBlaze.td - Describe the MBlaze Target Machine -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 97bd083fdd15..55fffe3ebfa7 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -38,8 +38,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -119,7 +117,7 @@ namespace {
static void printHex32(unsigned int Value, raw_ostream &O) {
O << "0x";
for (int i = 7; i >= 0; i--)
- O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+ O.write_hex((Value & (0xF << (i*4))) >> (i*4));
}
// Create a bitmask with all callee saved registers for CPU or Floating Point
@@ -311,9 +309,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ while (I != Pred->begin() && !(--I)->isTerminator())
; // Noop
- return I == Pred->end() || !I->getDesc().isBarrier();
+ return I == Pred->end() || !I->isBarrier();
}
// Force static initialization.
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index c07570a487b9..19e787d8622d 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -29,13 +29,11 @@ using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
-namespace llvm {
-cl::opt<bool> DisableDelaySlotFiller(
+static cl::opt<bool> MBDisableDelaySlotFiller(
"disable-mblaze-delay-filler",
cl::init(false),
cl::desc("Disable the MBlaze delay slot filter."),
cl::Hidden);
-}
namespace {
struct Filler : public MachineFunctionPass {
@@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
// Hazard check
MachineBasicBlock::iterator a = candidate;
MachineBasicBlock::iterator b = slot;
- MCInstrDesc desc = candidate->getDesc();
// MBB layout:-
// candidate := a0 = operation(a1, a2)
@@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
// 4. b0 is one or more of {a1, a2}
// 5. a accesses memory, and the middle bit
// contains a store operation.
- bool a_is_memory = desc.mayLoad() || desc.mayStore();
+ bool a_is_memory = candidate->mayLoad() || candidate->mayStore();
// Determine the number of operands in the slot instruction and in the
// candidate instruction.
@@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
}
// Check hazard type 5
- if (a_is_memory && m->getDesc().mayStore())
+ if (a_is_memory && m->mayStore())
return true;
}
@@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
if (candidate == MBB.begin())
return false;
- MCInstrDesc brdesc = (--candidate)->getDesc();
- return (brdesc.hasDelaySlot());
+ --candidate;
+ return (candidate->hasDelaySlot());
}
static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
@@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
break;
--I;
- MCInstrDesc desc = I->getDesc();
- if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
- desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+ if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) ||
+ I->isCall() || I->isReturn() || I->isBarrier() ||
hasUnknownSideEffects(I))
break;
@@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
- if (!DisableDelaySlotFiller)
+ if (!MBDisableDelaySlotFiller)
D = findDelayInstr(MBB,I);
++FilledSlots;
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
index 3f26ed15b284..e3c7236d1141 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -41,7 +41,6 @@ unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
default:
llvm_unreachable("unknown mblaze machine relocation type");
}
- return 0;
}
long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
@@ -54,7 +53,6 @@ long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
default:
llvm_unreachable("unknown mblaze relocation type");
}
- return 0;
}
unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
@@ -102,10 +100,8 @@ unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
unsigned RelOffset,
unsigned RelTy) const {
- if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
- return SymOffset - (RelOffset + 4);
- else
- assert("computeRelocation unknown for this relocation type");
-
- return 0;
+ assert((RelTy == ELF::R_MICROBLAZE_32_PCREL ||
+ RelTy == ELF::R_MICROBLAZE_64_PCREL) &&
+ "computeRelocation unknown for this relocation type");
+ return SymOffset - (RelOffset + 4);
}
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h
index 63bfc0da745a..a314eb76ea46 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.h
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetELFWriterInfo.h"
namespace llvm {
+ class TargetMachine;
class MBlazeELFWriterInfo : public TargetELFWriterInfo {
public:
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index f28d5a77d49c..d2f14a5c53b7 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//===-- MBlazeFrameLowering.cpp - MBlaze Frame Information ---------------====//
//
// The LLVM Compiler Infrastructure
//
@@ -32,13 +32,11 @@
using namespace llvm;
-namespace llvm {
- cl::opt<bool> DisableStackAdjust(
- "disable-mblaze-stack-adjust",
- cl::init(false),
- cl::desc("Disable MBlaze stack layout adjustment."),
- cl::Hidden);
-}
+static cl::opt<bool> MBDisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
static void replaceFrameIndexes(MachineFunction &MF,
SmallVector<std::pair<int,int64_t>, 16> &FR) {
@@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF,
//===----------------------------------------------------------------------===//
static void analyzeFrameIndexes(MachineFunction &MF) {
- if (DisableStackAdjust) return;
+ if (MBDisableStackAdjust) return;
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
@@ -213,13 +211,13 @@ static void analyzeFrameIndexes(MachineFunction &MF) {
static void interruptFrameLayout(MachineFunction &MF) {
const Function *F = MF.getFunction();
- llvm::CallingConv::ID CallConv = F->getCallingConv();
+ CallingConv::ID CallConv = F->getCallingConv();
// If this function is not using either the interrupt_handler
// calling convention or the save_volatiles calling convention
// then we don't need to do any additional frame layout.
- if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
- CallConv != llvm::CallingConv::MBLAZE_SVOL)
+ if (CallConv != CallingConv::MBLAZE_INTR &&
+ CallConv != CallingConv::MBLAZE_SVOL)
return;
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -230,7 +228,7 @@ static void interruptFrameLayout(MachineFunction &MF) {
// Determine if the calling convention is the interrupt_handler
// calling convention. Some pieces of the prologue and epilogue
// only need to be emitted if we are lowering and interrupt handler.
- bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+ bool isIntr = CallConv == CallingConv::MBLAZE_INTR;
// Determine where to put prologue and epilogue additions
MachineBasicBlock &MENT = MF.front();
@@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
// if frame pointer elimination is disabled.
bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects();
}
void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -348,8 +347,8 @@ void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
- bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+ CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
// Determine the correct frame layout
determineFrameLayout(MF);
@@ -394,8 +393,8 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
- llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
- bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+ CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
// Get the FI's where RA and FP are saved.
int FPOffset = MBlazeFI->getFPStackOffset();
@@ -432,8 +431,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
- llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
- bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+ CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
if (MFI->adjustsStack() || requiresRA) {
MBlazeFI->setRAStackOffset(0);
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 8be15bfb857d..01e6578a352f 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -15,11 +15,10 @@
#define MBLAZE_FRAMEINFO_H
#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class MBlazeSubtarget;
+class MBlazeSubtarget;
class MBlazeFrameLowering : public TargetFrameLowering {
protected:
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 8ec548f1437d..edfc3355691f 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -214,7 +216,7 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB)
const {
switch (MI->getOpcode()) {
- default: assert(false && "Unexpected instr type to insert");
+ default: llvm_unreachable("Unexpected instr type to insert");
case MBlaze::ShiftRL:
case MBlaze::ShiftRA:
@@ -600,7 +602,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue MBlazeTargetLowering::
LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("TLS not implemented for MicroBlaze.");
- return SDValue(); // Not reached
}
SDValue MBlazeTargetLowering::
@@ -656,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
MBlaze::R5, MBlaze::R6, MBlaze::R7,
MBlaze::R8, MBlaze::R9, MBlaze::R10
};
@@ -681,7 +682,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
/// TODO: isVarArg, isTailCall.
SDValue MBlazeTargetLowering::
LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool &isTailCall,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -895,7 +896,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
if (VA.isRegLoc()) {
MVT RegVT = VA.getLocVT();
ArgRegEnd = VA.getLocReg();
- TargetRegisterClass *RC = 0;
+ const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = MBlaze::GPRRegisterClass;
@@ -951,7 +952,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ false, false, false, 0));
}
}
@@ -963,7 +964,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
StackPtr = DAG.getRegister(StackReg, getPointerTy());
// The last register argument that must be saved is MBlaze::R10
- TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+ const TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
@@ -1045,10 +1046,10 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// If this function is using the interrupt_handler calling convention
// then use "rtid r14, 0" otherwise use "rtsd r15, 8"
- unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
- : MBlazeISD::Ret;
- unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14
- : MBlaze::R15;
+ unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
SDValue DReg = DAG.getRegister(Reg, MVT::i32);
if (Flag.getNode())
@@ -1079,7 +1080,6 @@ getConstraintType(const std::string &Constraint) const
case 'y':
case 'f':
return C_RegisterClass;
- break;
}
}
return TargetLowering::getConstraintType(Constraint);
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 8b49bc3de0cc..6a79fc126702 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,11 +15,11 @@
#ifndef MBlazeISELLOWERING_H
#define MBlazeISELLOWERING_H
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
namespace llvm {
namespace MBlazeCC {
@@ -134,7 +134,7 @@ namespace llvm {
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index 4acdcfdd772c..3f145938728c 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
+//===-- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index 3082a7e227f8..91b69de05102 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
+//===-- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 54f605f989a3..e40432a1b9a9 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
+//===-- MBlazeInstrFormats.td - MB Instruction defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -35,6 +35,7 @@ def FRIR : Format<17>; // RSUBI
def FRRRR : Format<18>; // RSUB, FRSUB
def FRI : Format<19>; // RSUB, FRSUB
def FC : Format<20>; // NOP
+def FRR : Format<21>; // CLZ
//===----------------------------------------------------------------------===//
// Describe MBlaze instructions format
@@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
let Inst{11-16} = flags;
let Inst{17-31} = imm15;
}
+
+//===----------------------------------------------------------------------===//
+// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// MBAR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> {
+ let Inst{6-31} = flags;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 7ae05b367cba..db71434443bf 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//===-- MBlazeInstrInfo.cpp - MBlaze Instruction Information --------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index 7174405a49d9..5252147b48e6 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//===-- MBlazeInstrInfo.h - MBlaze Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,9 +15,9 @@
#define MBLAZEINSTRUCTIONINFO_H
#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "MBlazeRegisterInfo.h"
#define GET_INSTRINFO_HEADER
#include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 1d8c987a0856..02a21574f493 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
+//===-- MBlazeInstrInfo.td - MBlaze Instruction defs -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
- def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
+ def NOP : MBlazeInst<0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
}
+let Predicates=[HasPatCmp] in {
+ def CLZ : TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src),
+ "clz $dst, $src", [], IIC_ALU>;
+}
+
+def IMEMBAR : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar 2", [], IIC_ALU>;
+def DMEMBAR : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar 1", [], IIC_ALU>;
+def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar 0", [], IIC_ALU>;
+
let usesCustomInserter = 1 in {
def Select_CC : MBlazePseudo<(outs GPR:$dst),
(ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
@@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 6)>;
+
def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
(Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
(CMP GPR:$R, GPR:$L), 1)>;
@@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
(Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>;
+
def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
(i32 GPR:$T), (i32 GPR:$F), SETEQ),
(Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
@@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>;
def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T),
+ (BEQID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T),
+ (BNEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T),
+ (BGTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T),
+ (BLTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T),
+ (BGEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T),
+ (BLEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T),
+ (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T),
+ (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T),
+ (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T),
+ (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+
def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
(BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
@@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
// 16-bit load and store
-def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>;
def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
// 8-bit load and store
-def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>;
def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
// Peepholes
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index ea81dd63d195..91aaf940e626 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//===-- MBlazeIntrinsicInfo.cpp - Intrinsic Information -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,6 +18,7 @@
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cstring>
using namespace llvm;
@@ -73,16 +74,13 @@ lookupGCCName(const char *Name) const {
}
bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
- // Overload Table
- const bool OTable[] = {
+ if (IntrID == 0)
+ return false;
+
+ unsigned id = IntrID - Intrinsic::num_intrinsics + 1;
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "MBlazeGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
- };
- if (IntrID == 0)
- return false;
- else
- return OTable[IntrID - Intrinsic::num_intrinsics];
}
/// This defines the "getAttributes(ID id)" method.
@@ -92,7 +90,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
static FunctionType *getType(LLVMContext &Context, unsigned id) {
Type *ResultTy = NULL;
- std::vector<Type*> ArgTys;
+ SmallVector<Type*, 8> ArgTys;
bool IsVarArg = false;
#define GET_INTRINSIC_GENERATOR
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
index 80760d87e00a..34f379230def 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -1,4 +1,4 @@
-//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//===-- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 278afbefc165..b5dc59547bbf 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -1,4 +1,4 @@
-//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//===-- IntrinsicsMBlaze.td - Defines MBlaze intrinsics ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index a7e400b1d1a4..6b9f42ec91a6 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
//
// The LLVM Compiler Infrastructure
//
@@ -85,9 +85,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
MCSymbol *MBlazeMCInstLower::
GetBlockAddressSymbol(const MachineOperand &MO) const {
switch (MO.getTargetFlags()) {
- default:
- assert(0 && "Unknown target flag on GV operand");
-
+ default: llvm_unreachable("Unknown target flag on GV operand");
case 0: break;
}
@@ -150,7 +148,7 @@ void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_BlockAddress:
MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
break;
- case MachineOperand::MO_FPImmediate:
+ case MachineOperand::MO_FPImmediate: {
bool ignored;
APFloat FVal = MO.getFPImm()->getValueAPF();
FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
@@ -160,6 +158,9 @@ void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::CreateImm(Val);
break;
}
+ case MachineOperand::MO_RegisterMask:
+ continue;
+ }
OutMI.addOperand(MCOp);
}
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
index 92196f220225..7b97744ea933 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.h
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,6 @@
namespace llvm {
class AsmPrinter;
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.cpp b/lib/Target/MBlaze/MBlazeMachineFunction.cpp
new file mode 100644
index 000000000000..2217b5477d6b
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.cpp
@@ -0,0 +1,14 @@
+//===-- MBlazeMachineFunctionInfo.cpp - Private data ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMachineFunction.h"
+
+using namespace llvm;
+
+void MBlazeFunctionInfo::anchor() { }
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index df395094282f..95cc5077cc16 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -1,4 +1,4 @@
-//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//===-- MBlazeMachineFunctionInfo.h - Private data --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,8 +24,8 @@ namespace llvm {
/// MBlazeFunctionInfo - This class is derived from MachineFunction private
/// MBlaze target-specific information for each MachineFunction.
class MBlazeFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
-private:
/// Holds for each function where on the stack the Frame Pointer must be
/// saved. This is used on Prologue and Epilogue to emit FP save/restore
int FPStackOffset;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 9788ba9e6021..46f5207a90ba 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//===-- MBlazeRegisterInfo.cpp - MBlaze Register Information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,9 @@
#define DEBUG_TYPE "mblaze-frame-info"
+#include "MBlazeRegisterInfo.h"
#include "MBlaze.h"
#include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
#include "MBlazeMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
@@ -54,10 +54,10 @@ unsigned MBlazeRegisterInfo::getPICCallReg() {
//===----------------------------------------------------------------------===//
/// MBlaze Callee Saved Registers
-const unsigned* MBlazeRegisterInfo::
+const uint16_t* MBlazeRegisterInfo::
getCalleeSavedRegs(const MachineFunction *MF) const {
// MBlaze callee-save register range is R20 - R31
- static const unsigned CalleeSavedRegs[] = {
+ static const uint16_t CalleeSavedRegs[] = {
MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
@@ -184,10 +184,8 @@ unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
- return 0;
}
unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
- return 0;
}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 7e4b269cb887..1d5116293516 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//===-- MBlazeRegisterInfo.h - MBlaze Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -46,7 +46,7 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
static unsigned getPICCallReg();
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 13c46ba1ecba..64cae5cff85d 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
+//===-- MBlazeRegisterInfo.td - MBlaze Register defs -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeRelocations.h b/lib/Target/MBlaze/MBlazeRelocations.h
index c298eda2195f..6387ee23ec9b 100644
--- a/lib/Target/MBlaze/MBlazeRelocations.h
+++ b/lib/Target/MBlaze/MBlazeRelocations.h
@@ -1,4 +1,4 @@
-//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//===-- MBlazeRelocations.h - MBlaze Code Relocations -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 4662f25ceb12..4a3ae5fc1470 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
+//===-- MBlazeSchedule.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td
index ccbf99dbd3a2..20257a60a0fa 100644
--- a/lib/Target/MBlaze/MBlazeSchedule3.td
+++ b/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//===-- MBlazeSchedule3.td - MBlaze Scheduling Definitions -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td
index fa88766fdb18..ab53b424ded3 100644
--- a/lib/Target/MBlaze/MBlazeSchedule5.td
+++ b/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//===-- MBlazeSchedule5.td - MBlaze Scheduling Definitions -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index 7e5667f55c15..d12d14245ea3 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//===-- MBlazeSubtarget.cpp - MBlaze Subtarget Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index 43b0197ad5aa..eb375046f218 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//===-- MBlazeSubtarget.h - Define Subtarget for the MBlaze ----*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 7bff53ef8717..dd7de9bff36b 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
#include "MBlazeTargetMachine.h"
+#include "MBlaze.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/FormattedStream.h"
@@ -33,30 +33,49 @@ extern "C" void LLVMInitializeMBlazeTarget() {
// an easier handling.
MBlazeTargetMachine::
MBlazeTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
- Subtarget(TT, CPU, FS),
- DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+namespace {
+/// MBlaze Code Generator Pass Configuration Options.
+class MBlazePassConfig : public TargetPassConfig {
+public:
+ MBlazePassConfig(MBlazeTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ MBlazeTargetMachine &getMBlazeTargetMachine() const {
+ return getTM<MBlazeTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new MBlazePassConfig(this, PM);
}
// Install an instruction selector pass using
// the ISelDag to gen MBlaze code.
-bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createMBlazeISelDag(*this));
+bool MBlazePassConfig::addInstSelector() {
+ PM.add(createMBlazeISelDag(getMBlazeTargetMachine()));
return false;
}
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
-bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createMBlazeDelaySlotFillerPass(*this));
+bool MBlazePassConfig::addPreEmitPass() {
+ PM.add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
return true;
}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index c1bc08aeb505..1647a2169210 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -43,7 +43,9 @@ namespace llvm {
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const MBlazeInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
@@ -77,8 +79,7 @@ namespace llvm {
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
- virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
} // End llvm namespace
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
index 37871b6916c9..36134a69387c 100644
--- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -3,13 +3,7 @@ add_llvm_library(LLVMMBlazeDesc
MBlazeMCAsmInfo.cpp
MBlazeMCCodeEmitter.cpp
MBlazeMCTargetDesc.cpp
- )
-
-add_llvm_library_dependencies(LLVMMBlazeDesc
- LLVMMBlazeAsmPrinter
- LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
+ MBlazeELFObjectWriter.cpp
)
add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..4982f0f17218
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeDesc
+parent = MBlaze
+required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f7d46a58f9..f383fecdc25a 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
static unsigned getFixupKindSize(unsigned Kind) {
switch (Kind) {
- default: assert(0 && "invalid fixup kind!");
+ default: llvm_unreachable("invalid fixup kind!");
case FK_Data_1: return 1;
case FK_PCRel_2:
case FK_Data_2: return 2;
@@ -39,12 +39,6 @@ static unsigned getFixupKindSize(unsigned Kind) {
namespace {
-class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- MBlazeELFObjectWriter(Triple::OSType OSType)
- : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
- /*HasRelocationAddend*/ true) {}
-};
class MBlazeAsmBackend : public MCAsmBackend {
public:
@@ -56,11 +50,16 @@ public:
return 2;
}
- bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
- void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
- bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
unsigned getPointerSize() const {
return 4;
@@ -76,7 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
}
}
-bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
return false;
@@ -87,12 +86,24 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
return hasExprOrImm;
}
-void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME: Is this right? It's what the "generic" code was doing before,
+ // but is X86 specific. Is it actually true for MBlaze also, or was it
+ // just close enough to not be a big deal?
+ //
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+void MBlazeAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res = Inst;
Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
}
-bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool MBlazeAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
if ((Count % 4) != 0)
return false;
@@ -106,20 +117,19 @@ bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
public:
- Triple::OSType OSType;
- ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
- : MBlazeAsmBackend(T), OSType(_OSType) { }
+ uint8_t OSABI;
+ ELFMBlazeAsmBackend(const Target &T, uint8_t _OSABI)
+ : MBlazeAsmBackend(T), OSABI(_OSABI) { }
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
- /*IsLittleEndian*/ false);
+ return createMBlazeELFObjectWriter(OS, OSABI);
}
};
-void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void ELFMBlazeAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned Size = getFixupKindSize(Fixup.getKind());
@@ -155,5 +165,6 @@ MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) {
if (TheTriple.isOSWindows())
assert(0 && "Windows not supported on MBlaze");
- return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFMBlazeAsmBackend(T, OSABI);
}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
index 776dbc4d8678..437026e7bbc0 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -51,6 +51,7 @@ namespace MBlazeII {
FRRRR,
FRI,
FC,
+ FRR,
FormMask = 63
//===------------------------------------------------------------------===//
@@ -95,7 +96,6 @@ static inline bool isSpecialMBlazeRegister(unsigned Reg) {
default:
return false;
}
- return false; // Not reached
}
/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g.
@@ -160,7 +160,6 @@ static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) {
case MBlaze::RPVR11 : return 0x200B;
default: llvm_unreachable("Unknown register number!");
}
- return 0; // Not reached
}
/// getRegisterFromNumbering - Given the enum value for some register, e.g.
@@ -201,7 +200,6 @@ static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) {
case 31 : return MBlaze::R31;
default: llvm_unreachable("Unknown register number!");
}
- return 0; // Not reached
}
static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
@@ -232,7 +230,6 @@ static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
case 0x200B : return MBlaze::RPVR11;
default: llvm_unreachable("Unknown register number!");
}
- return 0; // Not reached
}
} // end namespace llvm;
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp
new file mode 100644
index 000000000000..2824b3c35cf1
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp
@@ -0,0 +1,77 @@
+//===-- MBlazeELFObjectWriter.cpp - MBlaze ELF Writer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ MBlazeELFObjectWriter(uint8_t OSABI);
+
+ virtual ~MBlazeELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ };
+}
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MBLAZE,
+ /*HasRelocationAddend*/ false) {}
+
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
+}
+
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case FK_PCRel_4:
+ Type = ELF::R_MICROBLAZE_64_PCREL;
+ break;
+ case FK_PCRel_2:
+ Type = ELF::R_MICROBLAZE_32_PCREL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ Type = ((IsRelocWithSymbol || Addend !=0)
+ ? ELF::R_MICROBLAZE_32
+ : ELF::R_MICROBLAZE_64);
+ break;
+ case FK_Data_2:
+ Type = ELF::R_MICROBLAZE_32;
+ break;
+ }
+ }
+ return Type;
+}
+
+
+
+MCObjectWriter *llvm::createMBlazeELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new MBlazeELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/ false);
+}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
index 0d88466bb300..8231f07dfa80 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -14,6 +14,8 @@
#include "MBlazeMCAsmInfo.h"
using namespace llvm;
+void MBlazeMCAsmInfo::anchor() { }
+
MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
IsLittleEndian = false;
StackGrowsUp = false;
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
index e68dd58b016b..977f9a6866d7 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//===-- MBlazeMCAsmInfo.h - MBlaze asm properties --------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,13 +14,13 @@
#ifndef MBLAZETARGETASMINFO_H
#define MBLAZETARGETASMINFO_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
class MBlazeMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
public:
explicit MBlazeMCAsmInfo();
};
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index 1514557bf00b..c9b16368ecc5 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -43,7 +43,7 @@ public:
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
- unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+ uint64_t getBinaryCodeForInstr(const MCInst &MI) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
@@ -54,8 +54,8 @@ public:
static unsigned GetMBlazeRegNum(const MCOperand &MO) {
// FIXME: getMBlazeRegisterNumbering() is sufficient?
- assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
- return 0;
+ llvm_unreachable("MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet "
+ "implemented.");
}
void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
@@ -109,17 +109,14 @@ unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO) const {
if (MO.isReg())
return getMBlazeRegisterNumbering(MO.getReg());
- else if (MO.isImm())
+ if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
- else if (MO.isExpr())
- return 0; // The relocation has already been recorded at this point.
- else {
+ if (MO.isExpr())
+ return 0; // The relocation has already been recorded at this point.
#ifndef NDEBUG
- errs() << MO;
+ errs() << MO;
#endif
- llvm_unreachable(0);
- }
- return 0;
+ llvm_unreachable(0);
}
void MBlazeMCCodeEmitter::
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 43ae281519c2..9a7549b0e7cf 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -62,13 +62,14 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default)
RM = Reloc::Static;
if (CM == CodeModel::Default)
CM = CodeModel::Small;
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
@@ -82,12 +83,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (TheTriple.isOSDarwin()) {
llvm_unreachable("MBlaze does not support Darwin MACH-O format");
- return NULL;
}
if (TheTriple.isOSWindows()) {
llvm_unreachable("MBlaze does not support Windows COFF format");
- return NULL;
}
return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
@@ -96,9 +95,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new MBlazeInstPrinter(MAI);
+ return new MBlazeInstPrinter(MAI, MII, MRI);
return 0;
}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
index deff5cb078f9..ae82c32a5f26 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -14,24 +14,28 @@
#ifndef MBLAZEMCTARGETDESC_H
#define MBLAZEMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class MCAsmBackend;
class MCContext;
class MCCodeEmitter;
class MCInstrInfo;
+class MCObjectWriter;
class MCSubtargetInfo;
class Target;
class StringRef;
-class formatted_raw_ostream;
+class raw_ostream;
extern Target TheMBlazeTarget;
MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-
+
MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT);
+MCObjectWriter *createMBlazeELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
} // End llvm namespace
// Defines symbolic names for MBlaze registers. This defines a mapping from
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 93fce58883ed..b554d9b15e45 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo
MBlazeTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..ba7ee5d69188
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/TargetInfo/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeInfo
+parent = MBlaze
+required_libraries = MC Support Target
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 0952b76aefab..a8f9b52746ad 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -1,11 +1,11 @@
set(LLVM_TARGET_DEFINITIONS MSP430.td)
-llvm_tablegen(MSP430GenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(MSP430GenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(MSP430GenCallingConv.inc -gen-callingconv)
-llvm_tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM MSP430GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MSP430GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM MSP430GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM MSP430GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MSP430GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM MSP430GenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(MSP430CommonTableGen)
add_llvm_target(MSP430CodeGen
@@ -14,6 +14,7 @@ add_llvm_target(MSP430CodeGen
MSP430ISelLowering.cpp
MSP430InstrInfo.cpp
MSP430FrameLowering.cpp
+ MSP430MachineFunctionInfo.cpp
MSP430RegisterInfo.cpp
MSP430Subtarget.cpp
MSP430TargetMachine.cpp
@@ -22,19 +23,6 @@ add_llvm_target(MSP430CodeGen
MSP430MCInstLower.cpp
)
-add_llvm_library_dependencies(LLVMMSP430CodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMSP430AsmPrinter
- LLVMMSP430Desc
- LLVMMSP430Info
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index ce39d9517efe..64ac994b7f47 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
MSP430InstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMSP430AsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..37b8c2537fb4
--- /dev/null
+++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MSP430/InstPrinter/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MSP430AsmPrinter
+parent = MSP430
+required_libraries = MC Support
+add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 5d6c6ad93dbe..0930c453e954 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -92,7 +92,6 @@ void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
switch (CC) {
default:
llvm_unreachable("Unsupported CC code");
- break;
case MSP430CC::COND_E:
O << "eq";
break;
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index a1984a8aec19..d32eb3a21a37 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-//
//
// The LLVM Compiler Infrastructure
//
@@ -21,8 +21,9 @@ namespace llvm {
class MSP430InstPrinter : public MCInstPrinter {
public:
- MSP430InstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt
new file mode 100644
index 000000000000..51d9702ac560
--- /dev/null
+++ b/lib/Target/MSP430/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/MSP430/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = MSP430
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = MSP430CodeGen
+parent = MSP430
+required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target
+add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
index 04bd03e49460..adc95c52014e 100644
--- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMMSP430Desc
MSP430MCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMMSP430Desc
- LLVMMC
- LLVMMSP430AsmPrinter
- LLVMMSP430Info
- )
-
add_dependencies(LLVMMSP430Desc MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..3319d9363e16
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MSP430Desc
+parent = MSP430
+required_libraries = MC MSP430AsmPrinter MSP430Info Support Target
+add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index ad7d380b5631..2e328cb5d6ac 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -12,8 +12,11 @@
//===----------------------------------------------------------------------===//
#include "MSP430MCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
using namespace llvm;
+void MSP430MCAsmInfo::anchor() { }
+
MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
PointerSize = 2;
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index f3138a22022d..e5c2fc283b17 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//===-- MSP430MCAsmInfo.h - MSP430 asm properties --------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,13 +14,15 @@
#ifndef MSP430TARGETASMINFO_H
#define MSP430TARGETASMINFO_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
+ class StringRef;
class Target;
- struct MSP430MCAsmInfo : public MCAsmInfo {
+ class MSP430MCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
};
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index fda70b81dc8c..c455f6bc24f2 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -51,18 +51,21 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
}
static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new MSP430InstPrinter(MAI);
+ return new MSP430InstPrinter(MAI, MII, MRI);
return 0;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 35f259076441..7f3505ca5514 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -15,9 +15,7 @@
#define MSP430MCTARGETDESC_H
namespace llvm {
-class MCSubtargetInfo;
class Target;
-class StringRef;
extern Target TheMSP430Target;
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 5cc5e6e3d7c9..c6796b3789ad 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -1,4 +1,4 @@
-//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//===-- MSP430.td - Describe the MSP430 Target Machine -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 883654943b64..1d1094bc339d 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -65,7 +65,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
- default: assert(0 && "Not implemented yet!");
+ default: llvm_unreachable("Not implemented yet!");
case MachineOperand::MO_Register:
O << MSP430InstPrinter::getRegisterName(MO.getReg());
return;
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index bd644435c76f..bdeb0c590f2d 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=//
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches ---------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index c99f4ab6c2f9..61d7f2bf4766 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -1,4 +1,4 @@
-//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//===-- MSP430FrameLowering.cpp - MSP430 Frame Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,7 +29,7 @@ using namespace llvm;
bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (DisableFramePointerElim(MF) ||
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo()->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
@@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ if (Opc != MSP430::POP16r && !PI->isTerminator())
break;
--MBBI;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index dc374315171f..071a2f7de2c8 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -37,7 +36,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/VectorExtras.h"
using namespace llvm;
typedef enum {
@@ -80,7 +78,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setStackPointerRegisterToSaveRestore(MSP430::SPW);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- setSchedulingPreference(Sched::Latency);
// We have post-incremented loads / stores.
setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
@@ -124,8 +121,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::CTTZ, MVT::i8, Expand);
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
setOperationAction(ISD::CTLZ, MVT::i8, Expand);
setOperationAction(ISD::CTLZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
setOperationAction(ISD::CTPOP, MVT::i8, Expand);
setOperationAction(ISD::CTPOP, MVT::i16, Expand);
@@ -193,7 +194,6 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
- return SDValue();
}
}
@@ -259,19 +259,16 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
case CallingConv::Fast:
return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
case CallingConv::MSP430_INTR:
- if (Ins.empty())
- return Chain;
- else {
+ if (Ins.empty())
+ return Chain;
report_fatal_error("ISRs cannot have arguments");
- return SDValue();
- }
}
}
SDValue
MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -289,7 +286,6 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Outs, OutVals, Ins, dl, DAG, InVals);
case CallingConv::MSP430_INTR:
report_fatal_error("ISRs cannot be called directly");
- return SDValue();
}
}
@@ -372,7 +368,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ false, false, false, 0));
}
}
@@ -390,10 +386,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// ISRs cannot return any value.
- if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+ if (CallConv == CallingConv::MSP430_INTR && !Outs.empty())
report_fatal_error("ISRs cannot return any value");
- return SDValue();
- }
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -599,8 +593,7 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
// Expand non-constant shifts to loops:
if (!isa<ConstantSDNode>(N->getOperand(1)))
switch (Opc) {
- default:
- assert(0 && "Invalid shift opcode!");
+ default: llvm_unreachable("Invalid shift opcode!");
case ISD::SHL:
return DAG.getNode(MSP430ISD::SHL, dl,
VT, N->getOperand(0), N->getOperand(1));
@@ -651,7 +644,7 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
- return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
}
SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
@@ -660,7 +653,7 @@ SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
- return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
}
static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
@@ -908,13 +901,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -930,7 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return FrameAddr;
}
@@ -1028,8 +1021,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
unsigned Opc;
const TargetRegisterClass * RC;
switch (MI->getOpcode()) {
- default:
- assert(0 && "Invalid shift opcode!");
+ default: llvm_unreachable("Invalid shift opcode!");
case MSP430::Shl8:
Opc = MSP430::SHL8r1;
RC = MSP430::GR8RegisterClass;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 237f60435736..e372f00bf324 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -1,4 +1,4 @@
-//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//===-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -152,8 +152,8 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td
index 73aef1facc0f..a9e87dad0cd8 100644
--- a/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/lib/Target/MSP430/MSP430InstrFormats.td
@@ -1,4 +1,4 @@
-//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//===-- MSP430InstrFormats.td - MSP430 Instruction Formats -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index ffd43183c5d9..c03ba470af27 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//===-- MSP430InstrInfo.cpp - MSP430 Instruction Information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MSP430InstrInfo.h"
+#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -43,8 +42,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -72,8 +70,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -133,9 +130,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
switch (CC) {
- default:
- assert(0 && "Invalid branch condition!");
- break;
+ default: llvm_unreachable("Invalid branch condition!");
case MSP430CC::COND_E:
CC = MSP430CC::COND_NE;
break;
@@ -161,13 +156,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
}
bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -192,7 +186,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled
// by this analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Cannot handle indirect branches.
@@ -301,8 +295,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (Desc.TSFlags & MSP430II::SizeMask) {
default:
switch (Desc.getOpcode()) {
- default:
- assert(0 && "Unknown instruction size!");
+ default: llvm_unreachable("Unknown instruction size!");
case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
@@ -318,8 +311,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
case MSP430II::SizeSpecial:
switch (MI->getOpcode()) {
- default:
- assert(0 && "Unknown instruction size!");
+ default: llvm_unreachable("Unknown instruction size!");
case MSP430::SAR8r1c:
case MSP430::SAR16r1c:
return 4;
@@ -331,6 +323,4 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case MSP430II::Size6Bytes:
return 6;
}
-
- return 6;
}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 90013f5c2e70..04f339bdd608 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//===-- MSP430InstrInfo.h - MSP430 Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,8 +14,8 @@
#ifndef LLVM_TARGET_MSP430INSTRINFO_H
#define LLVM_TARGET_MSP430INSTRINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "MSP430RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 59cb59873ab7..4348dd5e54e6 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -1,4 +1,4 @@
-//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//===-- MSP430InstrInfo.td - MSP430 Instruction defs -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index d1d9a1158635..b1773fba7e92 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst --===//
//
// The LLVM Compiler Infrastructure
//
@@ -39,7 +39,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetExternalSymbolSymbol(const MachineOperand &MO) const {
switch (MO.getTargetFlags()) {
- default: assert(0 && "Unknown target flag on GV operand");
+ default: llvm_unreachable("Unknown target flag on GV operand");
case 0: break;
}
@@ -81,7 +81,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetBlockAddressSymbol(const MachineOperand &MO) const {
switch (MO.getTargetFlags()) {
- default: assert(0 && "Unknown target flag on GV operand");
+ default: llvm_unreachable("Unknown target flag on GV operand");
case 0: break;
}
@@ -116,7 +116,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
switch (MO.getType()) {
default:
MI->dump();
- assert(0 && "unknown operand type");
+ llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit()) continue;
@@ -143,6 +143,9 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_BlockAddress:
MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_RegisterMask:
+ continue;
}
OutMI.addOperand(MCOp);
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index e937696406fe..24151e2b8ea1 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,6 @@
namespace llvm {
class AsmPrinter;
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..0f7539908458
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void MSP430MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 383fd2e9821c..632d6dee275f 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -21,6 +21,8 @@ namespace llvm {
/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
/// contains private MSP430 target-specific information for each MachineFunction.
class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
unsigned CalleeSavedFrameSize;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 9049c4bf8f65..51ec71ace525 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//===-- MSP430RegisterInfo.cpp - MSP430 Register Information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,9 +13,9 @@
#define DEBUG_TYPE "msp430-reg-info"
+#include "MSP430RegisterInfo.h"
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
-#include "MSP430RegisterInfo.h"
#include "MSP430TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -38,27 +38,27 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
StackAlign = TM.getFrameLowering()->getStackAlignment();
}
-const unsigned*
+const uint16_t*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
const Function* F = MF->getFunction();
- static const unsigned CalleeSavedRegs[] = {
+ static const uint16_t CalleeSavedRegs[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
0
};
- static const unsigned CalleeSavedRegsFP[] = {
+ static const uint16_t CalleeSavedRegsFP[] = {
MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
0
};
- static const unsigned CalleeSavedRegsIntr[] = {
+ static const uint16_t CalleeSavedRegsIntr[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
0
};
- static const unsigned CalleeSavedRegsIntrFP[] = {
+ static const uint16_t CalleeSavedRegsIntrFP[] = {
MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 10a3d5320636..82ee4997392c 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//===-- MSP430RegisterInfo.h - MSP430 Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -36,18 +36,11 @@ public:
MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
- const TargetRegisterClass *
- getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B, unsigned Idx) const {
- // No sub-classes makes this really easy.
- return A;
- }
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index d1c2e3f7915c..3f2eb8ccef10 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//===-- MSP430RegisterInfo.td - MSP430 Register defs -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 3ee14d9f7a83..edeaf34676bd 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -1,4 +1,4 @@
-//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//===-- MSP430Subtarget.cpp - MSP430 Subtarget Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,6 +21,8 @@
using namespace llvm;
+void MSP430Subtarget::anchor() { }
+
MSP430Subtarget::MSP430Subtarget(const std::string &TT,
const std::string &CPU,
const std::string &FS) :
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 1ce5f11fe1bb..4d8792eede7f 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -1,4 +1,4 @@
-//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//===-- MSP430Subtarget.h - Define Subtarget for the MSP430 ----*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,16 +15,16 @@
#define LLVM_TARGET_MSP430_SUBTARGET_H
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "MSP430GenSubtargetInfo.inc"
-#include <string>
-
namespace llvm {
class StringRef;
class MSP430Subtarget : public MSP430GenSubtargetInfo {
+ virtual void anchor();
bool ExtendedInsts;
public:
/// This constructor initializes the data members to match that
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 4dd893326e3f..9f2eda13d7fd 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MSP430TargetMachine.h"
+#include "MSP430.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -28,24 +28,43 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
StringRef TT,
StringRef CPU,
StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameLowering(Subtarget) { }
+namespace {
+/// MSP430 Code Generator Pass Configuration Options.
+class MSP430PassConfig : public TargetPassConfig {
+public:
+ MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ MSP430TargetMachine &getMSP430TargetMachine() const {
+ return getTM<MSP430TargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new MSP430PassConfig(this, PM);
+}
-bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool MSP430PassConfig::addInstSelector() {
// Install an instruction selector.
- PM.add(createMSP430ISelDag(*this, OptLevel));
+ PM.add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
return false;
}
-bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool MSP430PassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
PM.add(createMSP430BranchSelectionPass());
return false;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index eb483dc8706f..f54146b3e338 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -1,4 +1,4 @@
-//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//===-- MSP430TargetMachine.h - Define TargetMachine for MSP430 -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -39,8 +39,9 @@ class MSP430TargetMachine : public LLVMTargetMachine {
public:
MSP430TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
@@ -61,8 +62,7 @@ public:
return &TSInfo;
}
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // MSP430TargetMachine.
} // end namespace llvm
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 1526946af5fd..f6b40eab31b6 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info
MSP430TargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMSP430Info
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMSP430Info MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..deafc2d2f558
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MSP430/TargetInfo/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MSP430Info
+parent = MSP430
+required_libraries = MC Support Target
+add_to_library_groups = MSP430
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 53ad155f376c..786a0c5ed187 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -22,12 +22,13 @@
#include "llvm/ADT/Twine.h"
using namespace llvm;
-static bool isAcceptableChar(char C, bool AllowPeriod) {
+static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
if ((C < 'a' || C > 'z') &&
(C < 'A' || C > 'Z') &&
(C < '0' || C > '9') &&
C != '_' && C != '$' && C != '@' &&
- !(AllowPeriod && C == '.'))
+ !(AllowPeriod && C == '.') &&
+ !(AllowUTF8 && (C & 0x80)))
return false;
return true;
}
@@ -56,8 +57,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
// If any of the characters in the string is an unacceptable character, force
// quotes.
bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ bool AllowUTF8 = MAI.doesAllowUTF8();
for (unsigned i = 0, e = Str.size(); i != e; ++i)
- if (!isAcceptableChar(Str[i], AllowPeriod))
+ if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
return true;
return false;
}
@@ -74,8 +76,9 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
}
bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ bool AllowUTF8 = MAI.doesAllowUTF8();
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (!isAcceptableChar(Str[i], AllowPeriod))
+ if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
MangleLetter(OutName, Str[i]);
else
OutName.push_back(Str[i]);
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..ac21c259fb44
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsAsmParser
+ MipsAsmParser.cpp
+ )
+
diff --git a/lib/Target/Mips/AsmParser/LLVMBuild.txt b/lib/Target/Mips/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..e7ca243d0e7f
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/AsmParser/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsAsmParser
+parent = Mips
+required_libraries = MC MCParser Support MipsDesc MipsInfo
+add_to_library_groups = Mips
diff --git a/lib/Target/Blackfin/MCTargetDesc/Makefile b/lib/Target/Mips/AsmParser/Makefile
index 6b26101f4473..679acee9fe72 100644
--- a/lib/Target/Blackfin/MCTargetDesc/Makefile
+++ b/lib/Target/Mips/AsmParser/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
+##===- lib/Target/Mips/AsmParser/Makefile ------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -6,11 +6,10 @@
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
-
LEVEL = ../../../..
-LIBRARYNAME = LLVMBlackfinDesc
+LIBRARYNAME = LLVMMipsAsmParser
-# Hack: we need to include 'main' target directory to grab private headers
+# Hack: we need to include 'main' mips target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
new file mode 100644
index 000000000000..58b559025757
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -0,0 +1,66 @@
+//===-- MipsAsmParser.cpp - Parse Mips assembly to MCInst instructions ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+class MipsAsmParser : public MCTargetAsmParser {
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool ParseDirective(AsmToken DirectiveID);
+
+public:
+ MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : MCTargetAsmParser() {
+ }
+
+};
+}
+
+bool MipsAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ return true;
+}
+
+bool MipsAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ return true;
+}
+
+bool MipsAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return true;
+}
+
+bool MipsAsmParser::
+ParseDirective(AsmToken DirectiveID) {
+ return true;
+}
+
+extern "C" void LLVMInitializeMipsAsmParser() {
+ RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
+ RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
+ RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target);
+ RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget);
+}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 71391f322725..13d17e4e5293 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -1,15 +1,17 @@
set(LLVM_TARGET_DEFINITIONS Mips.td)
-llvm_tablegen(MipsGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(MipsGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(MipsGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(MipsGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(MipsGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(MipsGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
+ MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
MipsDelaySlotFiller.cpp
@@ -21,7 +23,7 @@ add_llvm_target(MipsCodeGen
MipsISelLowering.cpp
MipsFrameLowering.cpp
MipsMCInstLower.cpp
- MipsMCSymbolRefExpr.cpp
+ MipsMachineFunction.cpp
MipsRegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
@@ -29,19 +31,7 @@ add_llvm_target(MipsCodeGen
MipsSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMMipsCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMipsAsmPrinter
- LLVMMipsDesc
- LLVMMipsInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
+add_subdirectory(AsmParser)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index c45b35df8c11..3e9fbf1c5566 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
MipsInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMipsAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..317057b913b1
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/InstPrinter/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsAsmPrinter
+parent = Mips
+required_libraries = MC Support
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile
index 74872a48b974..f07f3ed381ee 100644
--- a/lib/Target/Mips/InstPrinter/Makefile
+++ b/lib/Target/Mips/InstPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 3dafc6134488..6886b1745240 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -13,14 +13,15 @@
#define DEBUG_TYPE "asm-printer"
#include "MipsInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
using namespace llvm;
-#define GET_INSTRUCTION_NAME
#include "MipsGenAsmWriter.inc"
const char* Mips::MipsFCCToString(Mips::CondCode CC) {
@@ -61,12 +62,8 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) {
llvm_unreachable("Impossible condition code!");
}
-StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
-
void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << '$' << LowercaseString(getRegisterName(RegNo));
+ OS << '$' << StringRef(getRegisterName(RegNo)).lower();
}
void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -75,6 +72,59 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printAnnotation(O, Annot);
}
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+ int Offset = 0;
+ const MCSymbolRefExpr *SRE;
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+ assert(SRE && CE && "Binary expression must be sym+const.");
+ Offset = CE->getValue();
+ }
+ else if (!(SRE = dyn_cast<MCSymbolRefExpr>(Expr)))
+ assert(false && "Unexpected MCExpr type.");
+
+ MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
+
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case MCSymbolRefExpr::VK_None: break;
+ case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break;
+ case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break;
+ case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break;
+ case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break;
+ case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_HI: OS << "%dtprel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_LO: OS << "%dtprel_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
+ }
+
+ OS << SRE->getSymbol();
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+
+ if ((Kind == MCSymbolRefExpr::VK_Mips_GPOFF_HI) ||
+ (Kind == MCSymbolRefExpr::VK_Mips_GPOFF_LO))
+ OS << ")))";
+ else if (Kind != MCSymbolRefExpr::VK_None)
+ OS << ')';
+}
+
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
@@ -82,14 +132,14 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printRegName(O, Op.getReg());
return;
}
-
+
if (Op.isImm()) {
O << Op.getImm();
return;
}
-
+
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ printExpr(Op.getExpr(), O);
}
void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 5c1116538c61..76b839b2127f 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===//
+//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,7 @@
namespace llvm {
// These enumeration declarations were orignally in MipsInstrInfo.h but
// had to be moved here to avoid circular dependencies between
-// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
namespace Mips {
// Mips Branch Codes
enum FPBranchCode {
@@ -77,17 +77,17 @@ class TargetMachine;
class MipsInstPrinter : public MCInstPrinter {
public:
- MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
-
+ MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
- static const char *getInstructionName(unsigned Opcode);
static const char *getRegisterName(unsigned RegNo);
-
- virtual StringRef getOpcodeName(unsigned Opcode) const;
+
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-
+
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
new file mode 100644
index 000000000000..abbed8c90fc8
--- /dev/null
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -0,0 +1,34 @@
+;===- ./lib/Target/Mips/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = Mips
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = MipsCodeGen
+parent = Mips
+required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 2ceb5c95746b..fa231507a2ef 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -3,13 +3,7 @@ add_llvm_library(LLVMMipsDesc
MipsMCAsmInfo.cpp
MipsMCCodeEmitter.cpp
MipsMCTargetDesc.cpp
- )
-
-add_llvm_library_dependencies(LLVMMipsDesc
- LLVMMC
- LLVMMipsAsmPrinter
- LLVMMipsInfo
- LLVMSupport
+ MipsELFObjectWriter.cpp
)
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..29f5da691180
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/MCTargetDesc/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsDesc
+parent = Mips
+required_libraries = MC MipsAsmPrinter MipsInfo Support
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index f190ec42c776..e79be3363623 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -1,44 +1,172 @@
+//===-- MipsASMBackend.cpp - Mips Asm Backend ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsAsmBackend and MipsELFObjectWriter classes.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "MipsBaseInfo.h"
+#include "MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Object/MachOFormat.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
-namespace {
-class MipsELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
- bool HasRelocationAddend)
- : MCELFObjectTargetWriter(is64Bit, OSType, EMachine,
- HasRelocationAddend) {}
-};
+// Prepare value for the target space for it
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+
+ // Add/subtract and shift
+ switch (Kind) {
+ default:
+ return 0;
+ case FK_GPRel_4:
+ case FK_Data_4:
+ case Mips::fixup_Mips_LO16:
+ break;
+ case Mips::fixup_Mips_PC16:
+ // So far we are only using this type for branches.
+ // For branches we start 1 instruction after the branch
+ // so the displacement will be one instruction size less.
+ Value -= 4;
+ // The displacement is then divided by 4 to give us an 18 bit
+ // address range.
+ Value >>= 2;
+ break;
+ case Mips::fixup_Mips_26:
+ // So far we are only using this type for jumps.
+ // The displacement is then divided by 4 to give us an 28 bit
+ // address range.
+ Value >>= 2;
+ break;
+ case Mips::fixup_Mips_HI16:
+ case Mips::fixup_Mips_GOT_Local:
+ // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ Value = ((Value + 0x8000) >> 16) & 0xffff;
+ break;
+ }
+
+ return Value;
+}
+namespace {
class MipsAsmBackend : public MCAsmBackend {
+ Triple::OSType OSType;
+ bool IsLittle; // Big or little endian
+ bool Is64Bit; // 32 or 64 bit words
+
public:
- MipsAsmBackend(const Target &T)
- : MCAsmBackend() {}
+ MipsAsmBackend(const Target &T, Triple::OSType _OSType,
+ bool _isLittle, bool _is64Bit)
+ :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {}
- unsigned getNumFixupKinds() const {
- return 1; //tbd
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit);
}
/// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
+ MCFixupKind Kind = Fixup.getKind();
+ Value = adjustFixupValue((unsigned)Kind, Value);
+ int64_t SymOffset = MipsGetSymAndOffset(Fixup).second;
+
+ if (!Value && !SymOffset)
+ return; // Doesn't change encoding.
+
+ // Where do we start in the object
+ unsigned Offset = Fixup.getOffset();
+ // Number of bytes we need to fixup
+ unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+ // Used to point to big endian bytes
+ unsigned FullSize;
+
+ switch ((unsigned)Kind) {
+ case Mips::fixup_Mips_16:
+ FullSize = 2;
+ break;
+ case Mips::fixup_Mips_64:
+ FullSize = 8;
+ break;
+ default:
+ FullSize = 4;
+ break;
+ }
+
+ // Grab current value, if any, from bits.
+ uint64_t CurVal = 0;
+
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+ }
+
+ uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
+ CurVal |= (Value + SymOffset) & Mask;
+
+ // Write out the fixed up bytes back to the code/data bits.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
+ }
+ }
+
+ unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
+ // This table *must* be in same the order of fixup_* kinds in
+ // MipsFixupKinds.h.
+ //
+ // name offset bits flags
+ { "fixup_Mips_16", 0, 16, 0 },
+ { "fixup_Mips_32", 0, 32, 0 },
+ { "fixup_Mips_REL32", 0, 32, 0 },
+ { "fixup_Mips_26", 0, 26, 0 },
+ { "fixup_Mips_HI16", 0, 16, 0 },
+ { "fixup_Mips_LO16", 0, 16, 0 },
+ { "fixup_Mips_GPREL16", 0, 16, 0 },
+ { "fixup_Mips_LITERAL", 0, 16, 0 },
+ { "fixup_Mips_GOT_Global", 0, 16, 0 },
+ { "fixup_Mips_GOT_Local", 0, 16, 0 },
+ { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_CALL16", 0, 16, 0 },
+ { "fixup_Mips_GPREL32", 0, 32, 0 },
+ { "fixup_Mips_SHIFT5", 6, 5, 0 },
+ { "fixup_Mips_SHIFT6", 6, 5, 0 },
+ { "fixup_Mips_64", 0, 64, 0 },
+ { "fixup_Mips_TLSGD", 0, 16, 0 },
+ { "fixup_Mips_GOTTPREL", 0, 16, 0 },
+ { "fixup_Mips_TPREL_HI", 0, 16, 0 },
+ { "fixup_Mips_TPREL_LO", 0, 16, 0 },
+ { "fixup_Mips_TLSLDM", 0, 16, 0 },
+ { "fixup_Mips_DTPREL_HI", 0, 16, 0 },
+ { "fixup_Mips_DTPREL_LO", 0, 16, 0 },
+ { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
}
/// @name Target Relaxation Interfaces
@@ -48,70 +176,62 @@ public:
/// relaxation.
///
/// \param Inst - The instruction to test.
- bool MayNeedRelaxation(const MCInst &Inst) const {
+ bool mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
- /// RelaxInstruction - Relax the instruction in the given fragment to the next
- /// wider instruction.
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ return false;
+ }
+
+ /// RelaxInstruction - Relax the instruction in the given fragment
+ /// to the next wider instruction.
///
- /// \param Inst - The instruction to relax, which may be the same as the
- /// output.
+ /// \param Inst - The instruction to relax, which may be the same
+ /// as the output.
/// \parm Res [output] - On return, the relaxed instruction.
- void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
}
-
+
/// @}
- /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
- /// output. If the target cannot generate such a sequence, it should return an
- /// error.
+ /// WriteNopData - Write an (optimal) nop sequence of Count bytes
+ /// to the given output. If the target cannot generate such a sequence,
+ /// it should return an error.
///
/// \return - True on success.
- bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
- return false;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return true;
}
-};
+}; // class MipsAsmBackend
-class MipsEB_AsmBackend : public MipsAsmBackend {
-public:
- Triple::OSType OSType;
-
- MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType)
- : MipsAsmBackend(T), OSType(_OSType) {}
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(createELFObjectTargetWriter(),
- OS, /*IsLittleEndian*/ false);
- }
-
- MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
- return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
- }
-};
-
-class MipsEL_AsmBackend : public MipsAsmBackend {
-public:
- Triple::OSType OSType;
-
- MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType)
- : MipsAsmBackend(T), OSType(_OSType) {}
+} // namespace
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(createELFObjectTargetWriter(),
- OS, /*IsLittleEndian*/ true);
- }
+// MCAsmBackend
+MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/true, /*Is64Bit*/false);
+}
- MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
- return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
- }
-};
+MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/false, /*Is64Bit*/false);
}
-MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) {
- Triple TheTriple(TT);
+MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/true, /*Is64Bit*/true);
+}
- // just return little endian for now
- //
- return new MipsEL_AsmBackend(T, Triple(TT).getOS());
+MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT) {
+ return new MipsAsmBackend(T, Triple(TT).getOS(),
+ /*IsLittle*/false, /*Is64Bit*/true);
}
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index f7a6fa949091..fb1c5ce6b6c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===//
+//===-- MipsBaseInfo.h - Top level definitions for MIPS MC ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,11 +14,103 @@
#ifndef MIPSBASEINFO_H
#define MIPSBASEINFO_H
+#include "MipsFixupKinds.h"
#include "MipsMCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // Mips Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT16 - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ MO_GOT16,
+ MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ MO_GPREL,
+
+ /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HI,
+ MO_ABS_LO,
+
+ /// MO_TLSGD - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (General
+ // Dynamic TLS).
+ MO_TLSGD,
+
+ /// MO_TLSLDM - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (Local
+ // Dynamic TLS).
+ MO_TLSLDM,
+ MO_DTPREL_HI,
+ MO_DTPREL_LO,
+
+ /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+ // Exec TLS).
+ MO_GOTTPREL,
+
+ /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+ // the thread pointer (Local Exec TLS).
+ MO_TPREL_HI,
+ MO_TPREL_LO,
+
+ // N32/64 Flags.
+ MO_GPOFF_HI,
+ MO_GPOFF_LO,
+ MO_GOT_DISP,
+ MO_GOT_PAGE,
+ MO_GOT_OFST
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for
+ // Mips instructions.
+ //
+
+ // Pseudo - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// FrmR - This form is for instructions of the format R.
+ FrmR = 1,
+ /// FrmI - This form is for instructions of the format I.
+ FrmI = 2,
+ /// FrmJ - This form is for instructions of the format J.
+ FrmJ = 3,
+ /// FrmFR - This form is for instructions of the format FR.
+ FrmFR = 4,
+ /// FrmFI - This form is for instructions of the format FI.
+ FrmFI = 5,
+ /// FrmOther - This form is for instructions that have no specific format.
+ FrmOther = 6,
+
+ FormMask = 15
+ };
+}
+
+
/// getMipsRegisterNumbering - Given the enum value for some register,
/// return the number that it corresponds to.
inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
@@ -98,15 +190,43 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
case Mips::D14:
return 28;
case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ case Mips::HWR29:
return 29;
case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
- case Mips::D15:
+ case Mips::D15:
return 30;
case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
return 31;
default: llvm_unreachable("Unknown register number!");
}
- return 0; // Not reached
+}
+
+inline static std::pair<const MCSymbolRefExpr*, int64_t>
+MipsGetSymAndOffset(const MCFixup &Fixup) {
+ MCFixupKind FixupKind = Fixup.getKind();
+
+ if ((FixupKind < FirstTargetFixupKind) ||
+ (FixupKind >= MCFixupKind(Mips::LastTargetFixupKind)))
+ return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+ const MCExpr *Expr = Fixup.getValue();
+ MCExpr::ExprKind Kind = Expr->getKind();
+
+ if (Kind == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr);
+ const MCExpr *LHS = BE->getLHS();
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+
+ if ((LHS->getKind() != MCExpr::SymbolRef) || !CE)
+ return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+ return std::make_pair(cast<MCSymbolRefExpr>(LHS), CE->getValue());
+ }
+
+ if (Kind != MCExpr::SymbolRef)
+ return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+ return std::make_pair(cast<MCSymbolRefExpr>(Expr), 0);
}
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
new file mode 100644
index 000000000000..2091bec50082
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -0,0 +1,249 @@
+//===-- MipsELFObjectWriter.cpp - Mips ELF Writer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <list>
+
+using namespace llvm;
+
+namespace {
+ struct RelEntry {
+ RelEntry(const ELFRelocationEntry &R, const MCSymbol *S, int64_t O) :
+ Reloc(R), Sym(S), Offset(O) {}
+ ELFRelocationEntry Reloc;
+ const MCSymbol *Sym;
+ int64_t Offset;
+ };
+
+ typedef std::list<RelEntry> RelLs;
+ typedef RelLs::iterator RelLsIter;
+
+ class MipsELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI);
+
+ virtual ~MipsELFObjectWriter();
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual unsigned getEFlags() const;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs);
+ };
+}
+
+MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI)
+ : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
+ /*HasRelocationAddend*/ false) {}
+
+MipsELFObjectWriter::~MipsELFObjectWriter() {}
+
+// FIXME: get the real EABI Version from the Subtarget class.
+unsigned MipsELFObjectWriter::getEFlags() const {
+
+ // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static)
+ unsigned Flag = ELF::EF_MIPS_NOREORDER;
+
+ if (is64Bit())
+ Flag |= ELF::EF_MIPS_ARCH_64R2;
+ else
+ Flag |= ELF::EF_MIPS_ARCH_32R2;
+ return Flag;
+}
+
+const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ assert(Target.getSymA() && "SymA cannot be 0.");
+ const MCSymbol &Sym = Target.getSymA()->getSymbol().AliasedSymbol();
+
+ if (Sym.getSection().getKind().isMergeableCString() ||
+ Sym.getSection().getKind().isMergeableConst())
+ return &Sym;
+
+ return NULL;
+}
+
+unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+ unsigned Type = (unsigned)ELF::R_MIPS_NONE;
+ unsigned Kind = (unsigned)Fixup.getKind();
+
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ Type = ELF::R_MIPS_32;
+ break;
+ case FK_GPRel_4:
+ Type = ELF::R_MIPS_GPREL32;
+ break;
+ case Mips::fixup_Mips_GPREL16:
+ Type = ELF::R_MIPS_GPREL16;
+ break;
+ case Mips::fixup_Mips_26:
+ Type = ELF::R_MIPS_26;
+ break;
+ case Mips::fixup_Mips_CALL16:
+ Type = ELF::R_MIPS_CALL16;
+ break;
+ case Mips::fixup_Mips_GOT_Global:
+ case Mips::fixup_Mips_GOT_Local:
+ Type = ELF::R_MIPS_GOT16;
+ break;
+ case Mips::fixup_Mips_HI16:
+ Type = ELF::R_MIPS_HI16;
+ break;
+ case Mips::fixup_Mips_LO16:
+ Type = ELF::R_MIPS_LO16;
+ break;
+ case Mips::fixup_Mips_TLSGD:
+ Type = ELF::R_MIPS_TLS_GD;
+ break;
+ case Mips::fixup_Mips_GOTTPREL:
+ Type = ELF::R_MIPS_TLS_GOTTPREL;
+ break;
+ case Mips::fixup_Mips_TPREL_HI:
+ Type = ELF::R_MIPS_TLS_TPREL_HI16;
+ break;
+ case Mips::fixup_Mips_TPREL_LO:
+ Type = ELF::R_MIPS_TLS_TPREL_LO16;
+ break;
+ case Mips::fixup_Mips_TLSLDM:
+ Type = ELF::R_MIPS_TLS_LDM;
+ break;
+ case Mips::fixup_Mips_DTPREL_HI:
+ Type = ELF::R_MIPS_TLS_DTPREL_HI16;
+ break;
+ case Mips::fixup_Mips_DTPREL_LO:
+ Type = ELF::R_MIPS_TLS_DTPREL_LO16;
+ break;
+ case Mips::fixup_Mips_Branch_PCRel:
+ case Mips::fixup_Mips_PC16:
+ Type = ELF::R_MIPS_PC16;
+ break;
+ }
+
+ return Type;
+}
+
+// Return true if R is either a GOT16 against a local symbol or HI16.
+static bool NeedsMatchingLo(const MCAssembler &Asm, const RelEntry &R) {
+ if (!R.Sym)
+ return false;
+
+ MCSymbolData &SD = Asm.getSymbolData(R.Sym->AliasedSymbol());
+
+ return ((R.Reloc.Type == ELF::R_MIPS_GOT16) && !SD.isExternal()) ||
+ (R.Reloc.Type == ELF::R_MIPS_HI16);
+}
+
+static bool HasMatchingLo(const MCAssembler &Asm, RelLsIter I, RelLsIter Last) {
+ if (I == Last)
+ return false;
+
+ RelLsIter Hi = I++;
+
+ return (I->Reloc.Type == ELF::R_MIPS_LO16) && (Hi->Sym == I->Sym) &&
+ (Hi->Offset == I->Offset);
+}
+
+static bool HasSameSymbol(const RelEntry &R0, const RelEntry &R1) {
+ return R0.Sym == R1.Sym;
+}
+
+static int CompareOffset(const RelEntry &R0, const RelEntry &R1) {
+ return (R0.Offset > R1.Offset) ? 1 : ((R0.Offset == R1.Offset) ? 0 : -1);
+}
+
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+ // Call the defualt function first. Relocations are sorted in descending
+ // order of r_offset.
+ MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+
+ RelLs RelocLs;
+ std::vector<RelLsIter> Unmatched;
+
+ // Fill RelocLs. Traverse Relocs backwards so that relocations in RelocLs
+ // are in ascending order of r_offset.
+ for (std::vector<ELFRelocationEntry>::reverse_iterator R = Relocs.rbegin();
+ R != Relocs.rend(); ++R) {
+ std::pair<const MCSymbolRefExpr*, int64_t> P =
+ MipsGetSymAndOffset(*R->Fixup);
+ RelocLs.push_back(RelEntry(*R, P.first ? &P.first->getSymbol() : 0,
+ P.second));
+ }
+
+ // Get list of unmatched HI16 and GOT16.
+ for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R)
+ if (NeedsMatchingLo(Asm, *R) && !HasMatchingLo(Asm, R, --RelocLs.end()))
+ Unmatched.push_back(R);
+
+ // Insert unmatched HI16 and GOT16 immediately before their matching LO16.
+ for (std::vector<RelLsIter>::iterator U = Unmatched.begin();
+ U != Unmatched.end(); ++U) {
+ RelLsIter LoPos = RelocLs.end(), HiPos = *U;
+ bool MatchedLo = false;
+
+ for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) {
+ if ((R->Reloc.Type == ELF::R_MIPS_LO16) && HasSameSymbol(*HiPos, *R) &&
+ (CompareOffset(*R, *HiPos) >= 0) &&
+ ((LoPos == RelocLs.end()) || ((CompareOffset(*R, *LoPos) < 0)) ||
+ (!MatchedLo && !CompareOffset(*R, *LoPos))))
+ LoPos = R;
+
+ MatchedLo = NeedsMatchingLo(Asm, *R) &&
+ HasMatchingLo(Asm, R, --RelocLs.end());
+ }
+
+ // If a matching LoPos was found, move HiPos and insert it before LoPos.
+ // Make the offsets of HiPos and LoPos match.
+ if (LoPos != RelocLs.end()) {
+ HiPos->Offset = LoPos->Offset;
+ RelocLs.insert(LoPos, *HiPos);
+ RelocLs.erase(HiPos);
+ }
+ }
+
+ // Put the sorted list back in reverse order.
+ assert(Relocs.size() == RelocLs.size());
+ unsigned I = RelocLs.size();
+
+ for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R)
+ Relocs[--I] = R->Reloc;
+}
+
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian,
+ bool Is64Bit) {
+ MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 8b099eab91fd..9b76eda861dc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -1,7 +1,4 @@
-#ifndef LLVM_Mips_MipsFIXUPKINDS_H
-#define LLVM_Mips_MipsFIXUPKINDS_H
-
-//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===//
+//===-- MipsFixupKinds.h - Mips Specific Fixup Entries ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,81 +7,100 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_MIPS_MIPSFIXUPKINDS_H
+#define LLVM_MIPS_MIPSFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
namespace llvm {
namespace Mips {
- enum Fixups {
- // fixup_Mips_xxx - R_MIPS_NONE
- fixup_Mips_NONE = FirstTargetFixupKind,
+ // Although most of the current fixup types reflect a unique relocation
+ // one can have multiple fixup types for a given relocation and thus need
+ // to be uniquely named.
+ //
+ // This table *must* be in the save order of
+ // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
+ // in MipsAsmBackend.cpp.
+ //
+ enum Fixups {
+ // Branch fixups resulting in R_MIPS_16.
+ fixup_Mips_16 = FirstTargetFixupKind,
- // fixup_Mips_xxx - R_MIPS_16.
- fixup_Mips_16,
+ // Pure 32 bit data fixup resulting in - R_MIPS_32.
+ fixup_Mips_32,
- // fixup_Mips_xxx - R_MIPS_32.
- fixup_Mips_32,
+ // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32.
+ fixup_Mips_REL32,
- // fixup_Mips_xxx - R_MIPS_REL32.
- fixup_Mips_REL32,
+ // Jump 26 bit fixup resulting in - R_MIPS_26.
+ fixup_Mips_26,
- // fixup_Mips_xxx - R_MIPS_26.
- fixup_Mips_26,
+ // Pure upper 16 bit fixup resulting in - R_MIPS_HI16.
+ fixup_Mips_HI16,
- // fixup_Mips_xxx - R_MIPS_HI16.
- fixup_Mips_HI16,
+ // Pure lower 16 bit fixup resulting in - R_MIPS_LO16.
+ fixup_Mips_LO16,
- // fixup_Mips_xxx - R_MIPS_LO16.
- fixup_Mips_LO16,
+ // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
- // fixup_Mips_xxx - R_MIPS_GPREL16.
- fixup_Mips_GPREL16,
+ // 16 bit literal fixup resulting in - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
- // fixup_Mips_xxx - R_MIPS_LITERAL.
- fixup_Mips_LITERAL,
+ // Global symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Global,
- // fixup_Mips_xxx - R_MIPS_GOT16.
- fixup_Mips_GOT16,
+ // Local symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Local,
- // fixup_Mips_xxx - R_MIPS_PC16.
- fixup_Mips_PC16,
+ // PC relative branch fixup resulting in - R_MIPS_PC16.
+ fixup_Mips_PC16,
- // fixup_Mips_xxx - R_MIPS_CALL16.
- fixup_Mips_CALL16,
+ // resulting in - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
- // fixup_Mips_xxx - R_MIPS_GPREL32.
- fixup_Mips_GPREL32,
+ // resulting in - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
- // fixup_Mips_xxx - R_MIPS_SHIFT5.
- fixup_Mips_SHIFT5,
+ // resulting in - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
- // fixup_Mips_xxx - R_MIPS_SHIFT6.
- fixup_Mips_SHIFT6,
+ // resulting in - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
- // fixup_Mips_xxx - R_MIPS_64.
- fixup_Mips_64,
+ // Pure 64 bit data fixup resulting in - R_MIPS_64.
+ fixup_Mips_64,
- // fixup_Mips_xxx - R_MIPS_TLS_GD.
- fixup_Mips_TLSGD,
+ // resulting in - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
- // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
- fixup_Mips_GOTTPREL,
+ // resulting in - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
- // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
- fixup_Mips_TPREL_HI,
+ // resulting in - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
- // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
- fixup_Mips_TPREL_LO,
+ // resulting in - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
- // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
- fixup_Mips_Branch_PCRel,
+ // resulting in - R_MIPS_TLS_LDM.
+ fixup_Mips_TLSLDM,
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
- };
-} // namespace llvm
+ // resulting in - R_MIPS_TLS_DTPREL_HI16.
+ fixup_Mips_DTPREL_HI,
+
+ // resulting in - R_MIPS_TLS_DTPREL_LO16.
+ fixup_Mips_DTPREL_LO,
+
+ // PC relative branch fixup resulting in - R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
} // namespace Mips
+} // namespace llvm
-#endif /* LLVM_Mips_MipsFIXUPKINDS_H */
+#endif // LLVM_MIPS_MIPSFIXUPKINDS_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 71ae80498995..9d67aa1856e3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//===-- MipsMCAsmInfo.cpp - Mips Asm Properties ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,6 +16,8 @@
using namespace llvm;
+void MipsMCAsmInfo::anchor() { }
+
MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if ((TheTriple.getArch() == Triple::mips) ||
@@ -25,11 +27,12 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
Data32bitsDirective = "\t.4byte\t";
- Data64bitsDirective = 0;
+ Data64bitsDirective = "\t.8byte\t";
PrivateGlobalPrefix = "$";
CommentString = "#";
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
+ GPRel64Directive = "\t.gpdword\t";
WeakRefDirective = "\t.weak\t";
SupportsDebugInformation = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 41b719207b7b..e1d878936f31 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//===-- MipsMCAsmInfo.h - Mips Asm Info ------------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,13 +14,14 @@
#ifndef MIPSTARGETASMINFO_H
#define MIPSTARGETASMINFO_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
+ class StringRef;
class Target;
class MipsMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
public:
explicit MipsMCAsmInfo(const Target &T, StringRef TT);
};
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index d66de23ba115..27954b174ed9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===//
+//===-- MipsMCCodeEmitter.cpp - Convert Mips Code to Machine Code ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,16 +12,18 @@
//===----------------------------------------------------------------------===//
//
#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
-#include "MCTargetDesc/MipsMCTargetDesc.h"
using namespace llvm;
@@ -31,22 +33,252 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+ bool IsLittleEndian;
public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : MCII(mcii), STI(sti) {}
+ MCContext &ctx, bool IsLittle) :
+ MCII(mcii), STI(sti) , Ctx(ctx), IsLittleEndian(IsLittle) {}
~MipsMCCodeEmitter() {}
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
}
+
+ void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the instruction encoding in little endian byte order.
+ for (unsigned i = 0; i < Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBranchJumpOpValue - Return binary encoding of the jump
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBranchTargetOpValue - Return binary encoding of the branch
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getMachineOpValue - Return binary encoding of operand. If the machin
+ // operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
}; // class MipsMCCodeEmitter
} // namespace
-MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new MipsMCCodeEmitter(MCII, STI, Ctx);
+MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx)
+{
+ return new MipsMCCodeEmitter(MCII, STI, Ctx, false);
+}
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx)
+{
+ return new MipsMCCodeEmitter(MCII, STI, Ctx, true);
+}
+
+/// EncodeInstruction - Emit the instruction.
+/// Size the instruction (currently only 4 bytes
+void MipsMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const
+{
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+ // Check for unimplemented opcodes.
+ // Unfortunately in MIPS both NOT and SLL will come in with Binary == 0
+ // so we have to special check for them.
+ unsigned Opcode = MI.getOpcode();
+ if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
+ llvm_unreachable("unimplemented opcode in EncodeInstruction()");
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded and shouldn't be here
+ // in the first place!
+ if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+ llvm_unreachable("Pseudo opcode found in EncodeInstruction()");
+
+ // For now all instructions are 4 bytes
+ int Size = 4; // FIXME: Have Desc.getSize() return the correct value!
+
+ EmitInstruction(Binary, Size, OS);
}
+
+/// getBranchTargetOpValue - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_Mips_PC16)));
+ return 0;
+}
+
+/// getJumpTargetOpValue - Return binary encoding of the jump
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_Mips_26)));
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = getMipsRegisterNumbering(Reg);
+ return RegNo;
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ // MO must be an Expr.
+ assert(MO.isExpr());
+
+ const MCExpr *Expr = MO.getExpr();
+ MCExpr::ExprKind Kind = Expr->getKind();
+
+ if (Kind == MCExpr::Binary) {
+ Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
+ Kind = Expr->getKind();
+ }
+
+ assert (Kind == MCExpr::SymbolRef);
+
+ Mips::Fixups FixupKind;
+
+ switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
+ case MCSymbolRefExpr::VK_Mips_GPREL:
+ FixupKind = Mips::fixup_Mips_GPREL16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_CALL:
+ FixupKind = Mips::fixup_Mips_CALL16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT16:
+ FixupKind = Mips::fixup_Mips_GOT_Global;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT:
+ FixupKind = Mips::fixup_Mips_GOT_Local;
+ break;
+ case MCSymbolRefExpr::VK_Mips_ABS_HI:
+ FixupKind = Mips::fixup_Mips_HI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_ABS_LO:
+ FixupKind = Mips::fixup_Mips_LO16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TLSGD:
+ FixupKind = Mips::fixup_Mips_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TLSLDM:
+ FixupKind = Mips::fixup_Mips_TLSLDM;
+ break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_HI:
+ FixupKind = Mips::fixup_Mips_DTPREL_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_LO:
+ FixupKind = Mips::fixup_Mips_DTPREL_LO;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOTTPREL:
+ FixupKind = Mips::fixup_Mips_GOTTPREL;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_HI:
+ FixupKind = Mips::fixup_Mips_TPREL_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_TPREL_LO:
+ FixupKind = Mips::fixup_Mips_TPREL_LO;
+ break;
+ default:
+ break;
+ } // switch
+
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+/// getMemEncoding - Return binary encoding of memory related operand.
+/// If the offset operand requires relocation, record the relocation.
+unsigned
+MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups);
+
+ return (OffBits & 0xFFFF) | RegBits;
+}
+
+unsigned
+MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpNo).isImm());
+ unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ return SizeEncoding - 1;
+}
+
+// FIXME: should be called getMSBEncoding
+//
+unsigned
+MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpNo-1).isImm());
+ assert(MI.getOperand(OpNo).isImm());
+ unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+ unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+
+ return Position + Size - 1;
+}
+
+#include "MipsGenMCCodeEmitter.inc"
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 1f9e3ddf13c8..3c544f6aec90 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsMCTargetDesc.h"
#include "MipsMCAsmInfo.h"
+#include "MipsMCTargetDesc.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCCodeGenInfo.h"
@@ -20,6 +20,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -63,19 +64,24 @@ static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- if (RM == Reloc::Default)
+ if (CM == CodeModel::JITDefault)
+ RM = Reloc::Static;
+ else if (RM == Reloc::Default)
RM = Reloc::PIC_;
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
- return new MipsInstPrinter(MAI);
+ return new MipsInstPrinter(MAI, MII, MRI);
}
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
@@ -110,7 +116,8 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget,
+ createMipsMCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
@@ -120,25 +127,31 @@ extern "C" void LLVMInitializeMipsTargetMC() {
createMipsMCRegisterInfo);
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget,
+ createMipsMCCodeEmitterEB);
TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
- createMipsMCCodeEmitter);
+ createMipsMCCodeEmitterEL);
TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
- createMipsMCCodeEmitter);
+ createMipsMCCodeEmitterEB);
TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
- createMipsMCCodeEmitter);
+ createMipsMCCodeEmitterEL);
// Register the object streamer.
TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
+ createMCStreamer);
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
+ createMipsAsmBackendEB32);
+ TargetRegistry::RegisterMCAsmBackend(TheMipselTarget,
+ createMipsAsmBackendEL32);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64Target,
+ createMipsAsmBackendEB64);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
+ createMipsAsmBackendEL64);
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 7a0042ad889e..547ccddd78ea 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -14,25 +14,40 @@
#ifndef MIPSMCTARGETDESC_H
#define MIPSMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class MCAsmBackend;
-class MCInstrInfo;
class MCCodeEmitter;
class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
class MCSubtargetInfo;
class StringRef;
class Target;
+class raw_ostream;
extern Target TheMipsTarget;
extern Target TheMipselTarget;
extern Target TheMips64Target;
extern Target TheMips64elTarget;
-MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT);
+MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT);
+MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT);
+MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT);
-MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT);
+MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian,
+ bool Is64Bit);
} // End llvm namespace
// Defines symbolic names for Mips registers. This defines a mapping from
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index d72693c0940d..168635c96beb 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -15,9 +15,9 @@ TARGET = Mips
BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
- MipsGenSubtargetInfo.inc
+ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index bacecf20b920..bafadc8f25f6 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -21,8 +21,6 @@
namespace llvm {
class MipsTargetMachine;
class FunctionPass;
- class MachineCodeEmitter;
- class formatted_raw_ostream;
FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 39c2c164f664..cbebe84a1805 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,4 +1,4 @@
-//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
+//===-- Mips.td - Describe the Mips Target Machine ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -63,7 +63,7 @@ def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
[FeatureCondMov, FeatureBitCount]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
- [FeatureMips32, FeatureSEInReg]>;
+ [FeatureMips32, FeatureSEInReg, FeatureSwap]>;
def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
"Mips64", "Mips64 ISA Support",
[FeatureGP64Bit, FeatureFP64Bit,
@@ -79,9 +79,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
-def : Proc<"mips32r1", [FeatureMips32]>;
-def : Proc<"4ke", [FeatureMips32r2]>;
-def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips64", [FeatureMips64]>;
def : Proc<"mips64r2", [FeatureMips64r2]>;
def MipsAsmWriter : AsmWriter {
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 9758f4bb8907..427e8d97ad9c 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -25,68 +25,48 @@ def uimm16_64 : Operand<i64> {
// Transformation Function - get Imm - 32.
def Subtract32 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() - 32);
+ return getImm(N, (unsigned)N->getZExtValue() - 32);
}]>;
-// imm32_63 predicate - True if imm is in range [32, 63].
-def imm32_63 : ImmLeaf<i64,
- [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
- Subtract32>;
+// shamt must fit in 6 bits.
+def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
// Shifts
-class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm,
- SDNode OpNode, PatFrag PF>:
- FR<0x00, func, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt, shamt_64:$shamt),
- !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
- [(set CPU64Regs:$rd, (OpNode CPU64Regs:$rt, (i64 PF:$shamt)))],
- IIAlu> {
- let rs = _rs;
-}
-
-class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm,
- SDNode OpNode>:
- FR<0x00, func, (outs CPU64Regs:$rd), (ins CPU64Regs:$rs, CPU64Regs:$rt),
- !strconcat(instr_asm, "\t$rd, $rt, $rs"),
- [(set CPU64Regs:$rd, (OpNode CPU64Regs:$rt, CPU64Regs:$rs))], IIAlu> {
- let shamt = _shamt;
-}
+// 64-bit shift instructions.
+class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm,
+ SDNode OpNode>:
+ shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt,
+ CPU64Regs>;
// Mul, Div
-let rd = 0, shamt = 0, Defs = [HI64, LO64] in {
- let isCommutable = 1 in
- class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPU64Regs:$rs, CPU64Regs:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"), [], itin>;
-
- class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPU64Regs:$rs, CPU64Regs:$rt),
- !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
- [(op CPU64Regs:$rs, CPU64Regs:$rt)], itin>;
+class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>:
+ Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
+class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
+
+multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
+ def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, Requires<[NotN64]>;
+ def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]>;
}
-// Move from Hi/Lo
-let shamt = 0 in {
-let rs = 0, rt = 0 in
-class MoveFromLOHI64<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs CPU64Regs:$rd), (ins),
- !strconcat(instr_asm, "\t$rd"), [], IIHiLo>;
-
-let rt = 0, rd = 0 in
-class MoveToLOHI64<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs), (ins CPU64Regs:$rs),
- !strconcat(instr_asm, "\t$rs"), [], IIHiLo>;
+multiclass AtomicCmpSwap64<PatFrag Op, string Width> {
+ def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, Requires<[NotN64]>;
+ def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64]>;
}
-// Count Leading Ones/Zeros in Word
-class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>:
- FR<0x1c, func, (outs CPU64Regs:$rd), (ins CPU64Regs:$rs),
- !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
- Requires<[HasBitCount]> {
- let shamt = 0;
- let rt = rd;
+let usesCustomInserter = 1, Predicates = [HasMips64] in {
+ defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
+ defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
+ defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">;
+ defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64, "load_or_64">;
+ defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">;
+ defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">;
+ defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64, "swap_64">;
+ defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64, "64">;
}
//===----------------------------------------------------------------------===//
@@ -101,6 +81,7 @@ def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
+def LUi64 : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>;
/// Arithmetic Instructions (3-Operand, R-Type)
def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
@@ -113,26 +94,21 @@ def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
/// Shift Instructions
-def DSLL : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>;
-def DSRL : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>;
-def DSRA : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>;
-def DSLL32 : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>;
-def DSRL32 : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>;
-def DSRA32 : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>;
-def DSLLV : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>;
-def DSRLV : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>;
-def DSRAV : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>;
+def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>;
+def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>;
+def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>;
+def DSLLV : shift_rotate_reg<0x24, 0x00, "dsllv", shl, CPU64Regs>;
+def DSRLV : shift_rotate_reg<0x26, 0x00, "dsrlv", srl, CPU64Regs>;
+def DSRAV : shift_rotate_reg<0x27, 0x00, "dsrav", sra, CPU64Regs>;
// Rotate Instructions
let Predicates = [HasMips64r2] in {
- def DROTR : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>;
- def DROTR32 : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr,
- imm32_63>;
- def DROTRV : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>;
+ def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
+ def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
}
/// Load and Store Instructions
-/// aligned
+/// aligned
defm LB64 : LoadM64<0x20, "lb", sextloadi8>;
defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>;
defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>;
@@ -154,7 +130,14 @@ defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>;
defm ULD : LoadM64<0x37, "uld", load_u, 1>;
defm USD : StoreM64<0x3f, "usd", store_u, 1>;
+/// Load-linked, Store-conditional
+def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>;
+def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]>;
+def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>;
+def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>;
+
/// Jump and Branch Instructions
+def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>;
def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
@@ -162,46 +145,104 @@ def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>;
def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>;
+
/// Multiply and Divide Instructions.
-def DMULT : Mul64<0x1c, "dmult", IIImul>;
-def DMULTu : Mul64<0x1d, "dmultu", IIImul>;
+def DMULT : Mult64<0x1c, "dmult", IIImul>;
+def DMULTu : Mult64<0x1d, "dmultu", IIImul>;
def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
-let Defs = [HI64] in
- def MTHI64 : MoveToLOHI64<0x11, "mthi">;
-let Defs = [LO64] in
- def MTLO64 : MoveToLOHI64<0x13, "mtlo">;
+def MTHI64 : MoveToLOHI<0x11, "mthi", CPU64Regs, [HI64]>;
+def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>;
+def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>;
+def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>;
-let Uses = [HI64] in
- def MFHI64 : MoveFromLOHI64<0x10, "mfhi">;
-let Uses = [LO64] in
- def MFLO64 : MoveFromLOHI64<0x12, "mflo">;
+/// Sign Ext In Register Instructions.
+def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>;
+def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>;
/// Count Leading
-def DCLZ : CountLeading64<0x24, "dclz",
- [(set CPU64Regs:$rd, (ctlz CPU64Regs:$rs))]>;
-def DCLO : CountLeading64<0x25, "dclo",
- [(set CPU64Regs:$rd, (ctlz (not CPU64Regs:$rs)))]>;
+def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>;
+def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
+
+/// Double Word Swap Bytes/HalfWords
+def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
+def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
+
+def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
+
+let Uses = [SP_64] in
+def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
+ Requires<[IsN64]>;
+
+def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+
+def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DINS : InsBase<7, "dins", CPU64Regs>;
+
+def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll\t$rd, $rt, 32", [], IIAlu>;
+
+def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
-// Small immediates
-def : Pat<(i64 immSExt16:$in),
- (DADDiu ZERO_64, imm:$in)>;
-def : Pat<(i64 immZExt16:$in),
- (ORi64 ZERO_64, imm:$in)>;
-
-// zextloadi32_u
-def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>,
- Requires<[IsN64]>;
-def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>,
- Requires<[NotN64]>;
+// extended loads
+let Predicates = [NotN64] in {
+ def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
+ def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
+ def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>;
+ def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>;
+ def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>;
+ def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>;
+ def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>;
+}
+let Predicates = [IsN64] in {
+ def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
+ def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
+ def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>;
+ def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>;
+ def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>;
+ def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>;
+ def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>;
+}
// hi/lo relocs
-def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
+def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
+def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
+
+def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
+
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
+ (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
+ (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
+ (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+
+def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>;
+def : WrapperPat<tconstpool, DADDiu, CPU64Regs>;
+def : WrapperPat<texternalsym, DADDiu, CPU64Regs>;
+def : WrapperPat<tblockaddress, DADDiu, CPU64Regs>;
+def : WrapperPat<tjumptable, DADDiu, CPU64Regs>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, CPU64Regs>;
defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
ZERO_64>;
@@ -212,3 +253,21 @@ defm : SetlePats<CPU64Regs, SLT64, SLTu64>;
defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
+
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>;
+
+// truncate
+def : Pat<(i32 (trunc CPU64Regs:$src)),
+ (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>;
+
+// 32-to-64-bit extension
+def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
+def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+
+// Sign extend in register
+def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>;
+
+// bswap pattern
+def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
new file mode 100644
index 000000000000..dc8fbd0d0370
--- /dev/null
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -0,0 +1,153 @@
+//===-- MipsAnalyzeImmediate.cpp - Analyze Immediates ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsAnalyzeImmediate.h"
+#include "Mips.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+MipsAnalyzeImmediate::Inst::Inst(unsigned O, unsigned I) : Opc(O), ImmOpnd(I) {}
+
+// Add I to the instruction sequences.
+void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
+ // Add an instruction seqeunce consisting of just I.
+ if (SeqLs.empty()) {
+ SeqLs.push_back(InstSeq(1, I));
+ return;
+ }
+
+ for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter)
+ Iter->push_back(I);
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ unsigned Shamt = CountTrailingZeros_64(Imm);
+ GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
+ AddInstr(SeqLs, Inst(SLL, Shamt));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
+ InstSeqLs &SeqLs) {
+ uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size));
+
+ // Do nothing if Imm is 0.
+ if (!MaskedImm)
+ return;
+
+ // A single ADDiu will do if RemSize <= 16.
+ if (RemSize <= 16) {
+ AddInstr(SeqLs, Inst(ADDiu, MaskedImm));
+ return;
+ }
+
+ // Shift if the lower 16-bit is cleared.
+ if (!(Imm & 0xffff)) {
+ GetInstSeqLsSLL(Imm, RemSize, SeqLs);
+ return;
+ }
+
+ GetInstSeqLsADDiu(Imm, RemSize, SeqLs);
+
+ // If bit 15 is cleared, it doesn't make a difference whether the last
+ // instruction is an ADDiu or ORi. In that case, do not call GetInstSeqLsORi.
+ if (Imm & 0x8000) {
+ InstSeqLs SeqLsORi;
+ GetInstSeqLsORi(Imm, RemSize, SeqLsORi);
+ SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end());
+ }
+}
+
+// Replace a ADDiu & SLL pair with a LUi.
+// e.g. the following two instructions
+// ADDiu 0x0111
+// SLL 18
+// are replaced with
+// LUi 0x444
+void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) {
+ // Check if the first two instructions are ADDiu and SLL and the shift amount
+ // is at least 16.
+ if ((Seq.size() < 2) || (Seq[0].Opc != ADDiu) ||
+ (Seq[1].Opc != SLL) || (Seq[1].ImmOpnd < 16))
+ return;
+
+ // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit.
+ int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd);
+ int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16);
+
+ if (!isInt<16>(ShiftedImm))
+ return;
+
+ // Replace the first instruction and erase the second.
+ Seq[0].Opc = LUi;
+ Seq[0].ImmOpnd = (unsigned)(ShiftedImm & 0xffff);
+ Seq.erase(Seq.begin() + 1);
+}
+
+void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) {
+ InstSeqLs::iterator ShortestSeq = SeqLs.end();
+ // The length of an instruction sequence is at most 7.
+ unsigned ShortestLength = 8;
+
+ for (InstSeqLs::iterator S = SeqLs.begin(); S != SeqLs.end(); ++S) {
+ ReplaceADDiuSLLWithLUi(*S);
+ assert(S->size() <= 7);
+
+ if (S->size() < ShortestLength) {
+ ShortestSeq = S;
+ ShortestLength = S->size();
+ }
+ }
+
+ Insts.clear();
+ Insts.append(ShortestSeq->begin(), ShortestSeq->end());
+}
+
+const MipsAnalyzeImmediate::InstSeq
+&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size,
+ bool LastInstrIsADDiu) {
+ this->Size = Size;
+
+ if (Size == 32) {
+ ADDiu = Mips::ADDiu;
+ ORi = Mips::ORi;
+ SLL = Mips::SLL;
+ LUi = Mips::LUi;
+ } else {
+ ADDiu = Mips::DADDiu;
+ ORi = Mips::ORi64;
+ SLL = Mips::DSLL;
+ LUi = Mips::LUi64;
+ }
+
+ InstSeqLs SeqLs;
+
+ // Get the list of instruction sequences.
+ if (LastInstrIsADDiu | !Imm)
+ GetInstSeqLsADDiu(Imm, Size, SeqLs);
+ else
+ GetInstSeqLs(Imm, Size, SeqLs);
+
+ // Set Insts to the shortest instruction sequence.
+ GetShortestSeq(SeqLs, Insts);
+
+ return Insts;
+}
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
new file mode 100644
index 000000000000..a094ddae45de
--- /dev/null
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -0,0 +1,63 @@
+//===-- MipsAnalyzeImmediate.h - Analyze Immediates ------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPS_ANALYZE_IMMEDIATE_H
+#define MIPS_ANALYZE_IMMEDIATE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+ class MipsAnalyzeImmediate {
+ public:
+ struct Inst {
+ unsigned Opc, ImmOpnd;
+ Inst(unsigned Opc, unsigned ImmOpnd);
+ };
+ typedef SmallVector<Inst, 7 > InstSeq;
+
+ /// Analyze - Get an instrucion sequence to load immediate Imm. The last
+ /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
+ /// true;
+ const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
+ private:
+ typedef SmallVector<InstSeq, 5> InstSeqLs;
+
+ /// AddInstr - Add I to all instruction sequences in SeqLs.
+ void AddInstr(InstSeqLs &SeqLs, const Inst &I);
+
+ /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
+ /// load immediate Imm
+ void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
+ /// load immediate Imm
+ void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
+ /// load immediate Imm
+ void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
+ void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+ /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
+ void ReplaceADDiuSLLWithLUi(InstSeq &Seq);
+
+ /// GetShortestSeq - Find the shortest instruction sequence in SeqLs and
+ /// return it in Insts.
+ void GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts);
+
+ unsigned Size;
+ unsigned ADDiu, ORi, SLL, LUi;
+ InstSeq Insts;
+ };
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 0e826812d076..8206cfc15704 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//===-- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,10 +16,12 @@
#include "MipsAsmPrinter.h"
#include "Mips.h"
#include "MipsInstrInfo.h"
-#include "MipsMachineFunction.h"
-#include "MipsMCInstLower.h"
-#include "MipsMCSymbolRefExpr.h"
#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/BasicBlock.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -27,55 +29,125 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Instructions.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/DebugInfo.h"
using namespace llvm;
-static bool isUnalignedLoadStore(unsigned Opc) {
- return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu ||
- Opc == Mips::USW || Opc == Mips::USH ||
- Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 ||
- Opc == Mips::USW_P8 || Opc == Mips::USH_P8;
+void MipsAsmPrinter::EmitInstrWithMacroNoAT(const MachineInstr *MI) {
+ MCInst TmpInst;
+
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ if (MipsFI->getEmitNOAT())
+ OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+ OutStreamer.EmitInstruction(TmpInst);
+ if (MipsFI->getEmitNOAT())
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+}
+
+bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ MipsFI = MF.getInfo<MipsFunctionInfo>();
+ AsmPrinter::runOnMachineFunction(MF);
+ return true;
}
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
-
if (MI->isDebugValue()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
PrintDebugValueComment(MI, OS);
return;
}
- MipsMCInstLower MCInstLowering(Mang, *MF, *this);
unsigned Opc = MI->getOpcode();
MCInst TmpInst0;
- MCInstLowering.Lower(MI, TmpInst0);
-
- // Enclose unaligned load or store with .macro & .nomacro directives.
- if (isUnalignedLoadStore(Opc)) {
- MCInst Directive;
- Directive.setOpcode(Mips::MACRO);
- OutStreamer.EmitInstruction(Directive);
- OutStreamer.EmitInstruction(TmpInst0);
- Directive.setOpcode(Mips::NOMACRO);
- OutStreamer.EmitInstruction(Directive);
+ SmallVector<MCInst, 4> MCInsts;
+
+ switch (Opc) {
+ case Mips::ULW:
+ case Mips::ULH:
+ case Mips::ULHu:
+ case Mips::USW:
+ case Mips::USH:
+ case Mips::ULW_P8:
+ case Mips::ULH_P8:
+ case Mips::ULHu_P8:
+ case Mips::USW_P8:
+ case Mips::USH_P8:
+ case Mips::ULD:
+ case Mips::ULW64:
+ case Mips::ULH64:
+ case Mips::ULHu64:
+ case Mips::USD:
+ case Mips::USW64:
+ case Mips::USH64:
+ case Mips::ULD_P8:
+ case Mips::ULW64_P8:
+ case Mips::ULH64_P8:
+ case Mips::ULHu64_P8:
+ case Mips::USD_P8:
+ case Mips::USW64_P8:
+ case Mips::USH64_P8: {
+ if (OutStreamer.hasRawTextSupport()) {
+ EmitInstrWithMacroNoAT(MI);
+ return;
+ }
+
+ MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts);
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
+ != MCInsts.end(); ++I)
+ OutStreamer.EmitInstruction(*I);
+
return;
}
+ case Mips::CPRESTORE: {
+ const MachineOperand &MO = MI->getOperand(0);
+ assert(MO.isImm() && "CPRESTORE's operand must be an immediate.");
+ int64_t Offset = MO.getImm();
+
+ if (OutStreamer.hasRawTextSupport()) {
+ if (!isInt<16>(Offset)) {
+ EmitInstrWithMacroNoAT(MI);
+ return;
+ }
+ } else {
+ MCInstLowering.LowerCPRESTORE(Offset, MCInsts);
+
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+ I != MCInsts.end(); ++I)
+ OutStreamer.EmitInstruction(*I);
+ return;
+ }
+
+ break;
+ }
+ case Mips::SETGP01: {
+ MCInstLowering.LowerSETGP01(MI, MCInsts);
+
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+ I != MCInsts.end(); ++I)
+ OutStreamer.EmitInstruction(*I);
+
+ return;
+ }
+ default:
+ break;
+ }
+
+ MCInstLowering.Lower(MI, TmpInst0);
OutStreamer.EmitInstruction(TmpInst0);
}
@@ -138,7 +210,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
if (Mips::CPURegsRegisterClass->contains(Reg))
break;
- unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ unsigned RegNum = getMipsRegisterNumbering(Reg);
if (Mips::AFGR64RegisterClass->contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
@@ -153,7 +225,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
// Set CPU Bitmask.
for (; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ unsigned RegNum = getMipsRegisterNumbering(Reg);
CPUBitmask |= (1 << RegNum);
}
@@ -177,7 +249,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
O << "0x";
for (int i = 7; i >= 0; i--)
- O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+ O.write_hex((Value & (0xF << (i*4))) >> (i*4));
}
//===----------------------------------------------------------------------===//
@@ -192,10 +264,11 @@ void MipsAsmPrinter::emitFrameDirective() {
unsigned returnReg = RI.getRARegister();
unsigned stackSize = MF->getFrameInfo()->getStackSize();
- OutStreamer.EmitRawText("\t.frame\t$" +
- Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.frame\t$" +
+ StringRef(MipsInstPrinter::getRegisterName(stackReg)).lower() +
"," + Twine(stackSize) + ",$" +
- Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
+ StringRef(MipsInstPrinter::getRegisterName(returnReg)).lower());
}
/// Emit Set directives.
@@ -205,27 +278,49 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
case MipsSubtarget::N32: return "abiN32";
case MipsSubtarget::N64: return "abi64";
case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
- default: break;
+ default: llvm_unreachable("Unknown Mips ABI");;
}
-
- llvm_unreachable("Unknown Mips ABI");
- return NULL;
}
void MipsAsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
OutStreamer.EmitLabel(CurrentFnSym);
}
/// EmitFunctionBodyStart - Targets can override this to emit stuff before
/// the first basic block in the function.
void MipsAsmPrinter::EmitFunctionBodyStart() {
+ MCInstLowering.Initialize(Mang, &MF->getContext());
+
emitFrameDirective();
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printSavedRegsBitmask(OS);
- OutStreamer.EmitRawText(OS.str());
+ bool EmitCPLoad = (MF->getTarget().getRelocationModel() == Reloc::PIC_) &&
+ Subtarget->isABI_O32() && MipsFI->globalBaseRegSet() &&
+ MipsFI->globalBaseRegFixed();
+
+ if (OutStreamer.hasRawTextSupport()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printSavedRegsBitmask(OS);
+ OutStreamer.EmitRawText(OS.str());
+
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
+
+ // Emit .cpload directive if needed.
+ if (EmitCPLoad)
+ OutStreamer.EmitRawText(StringRef("\t.cpload\t$25"));
+
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+ if (MipsFI->getEmitNOAT())
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ } else if (EmitCPLoad) {
+ SmallVector<MCInst, 4> MCInsts;
+ MCInstLowering.LowerCPLOAD(MCInsts);
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+ I != MCInsts.end(); ++I)
+ OutStreamer.EmitInstruction(*I);
+ }
}
/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
@@ -234,11 +329,15 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
// There are instruction for this macros, but they must
// always be at the function end, and we can't emit and
// break with BB logic.
- OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
- OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
- OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
-}
+ if (OutStreamer.hasRawTextSupport()) {
+ if (MipsFI->getEmitNOAT())
+ OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+ }
+}
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
/// exactly one predecessor and the control transfer mechanism between
@@ -262,24 +361,24 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// If there isn't exactly one predecessor, it can't be a fall through.
MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
++PI2;
-
+
if (PI2 != MBB->pred_end())
- return false;
+ return false;
// The predecessor has to be immediately before this block.
if (!Pred->isLayoutSuccessor(MBB))
return false;
-
+
// If the block is completely empty, then it definitely does fall through.
if (Pred->empty())
return true;
-
+
// Otherwise, check the last instruction.
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+ while (I != Pred->begin() && !(--I)->isTerminator()) ;
- return !I->getDesc().isBarrier();
+ return !I->isBarrier();
}
// Print out an operand for an inline asm expression.
@@ -300,7 +399,7 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier.
-
+
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
@@ -335,7 +434,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
switch (MO.getType()) {
case MachineOperand::MO_Register:
O << '$'
- << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
+ << StringRef(MipsInstPrinter::getRegisterName(MO.getReg())).lower();
break;
case MachineOperand::MO_Immediate:
@@ -420,18 +519,23 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// FIXME: Use SwitchSection.
// Tell the assembler which ABI we are using
- OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.section .mdebug." +
+ Twine(getCurrentABIString()));
// TODO: handle O64 ABI
- if (Subtarget->isABI_EABI()) {
- if (Subtarget->isGP32bit())
- OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
- else
- OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+ if (OutStreamer.hasRawTextSupport()) {
+ if (Subtarget->isABI_EABI()) {
+ if (Subtarget->isGP32bit())
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+ }
}
// return to previous section
- OutStreamer.EmitRawText(StringRef("\t.previous"));
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
}
MachineLocation
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 16461ff1fbb0..562bf9ce0092 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//===-- MipsAsmPrinter.h - Mips LLVM Assembly Printer ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
#ifndef MIPSASMPRINTER_H
#define MIPSASMPRINTER_H
+#include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/Compiler.h"
@@ -22,16 +24,22 @@
namespace llvm {
class MCStreamer;
class MachineInstr;
-class raw_ostream;
class MachineBasicBlock;
class Module;
+class raw_ostream;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
- const MipsSubtarget *Subtarget;
-
+
+ void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+
public:
+
+ const MipsSubtarget *Subtarget;
+ const MipsFunctionInfo *MipsFI;
+ MipsMCInstLower MCInstLowering;
+
explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
+ : AsmPrinter(TM, Streamer), MCInstLowering(*this) {
Subtarget = &TM.getSubtarget<MipsSubtarget>();
}
@@ -39,6 +47,8 @@ public:
return "Mips Assembly Printer";
}
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
void EmitInstruction(const MachineInstr *MI);
void printSavedRegsBitmask(raw_ostream &O);
void printHex32(unsigned int Value, raw_ostream &O);
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 0ae4ef6fbad4..4b7e1d37662c 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,4 +1,4 @@
-//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
+//===-- MipsCallingConv.td - Calling Conventions for Mips --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -35,12 +35,18 @@ def RetCC_MipsO32 : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_MipsN : CallingConv<[
- // FIXME: Handle byval, complex and float double parameters.
+ // Handles byval parameters.
+ CCIfByVal<CCCustom<"CC_Mips64Byval">>,
- // Promote i8/i16/i32 arguments to i64.
- CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
// Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
+ T0, T1, T2, T3],
+ [F12, F13, F14, F15,
+ F16, F17, F18, F19]>>,
+
CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
T0_64, T1_64, T2_64, T3_64],
[D12_64, D13_64, D14_64, D15_64,
@@ -59,13 +65,30 @@ def CC_MipsN : CallingConv<[
T0_64, T1_64, T2_64, T3_64]>>,
// All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
- CCIfType<[i64, f64], CCAssignToStack<8, 8>>,
- CCIfType<[f32], CCAssignToStack<4, 8>>
+ CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
-def RetCC_MipsN : CallingConv<[
- // FIXME: Handle complex and float double return values.
+// N32/64 variable arguments.
+// All arguments are passed in integer registers.
+def CC_MipsN_VarArg : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCCustom<"CC_Mips64Byval">>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+def RetCC_MipsN : CallingConv<[
// i32 are returned in registers V0, V1
CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
@@ -137,3 +160,20 @@ def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
CCDelegateTo<RetCC_MipsO32>
]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
+ (sequence "S%u", 7, 0))>;
+
+def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
+ (sequence "S%u", 7, 0))>;
+
+def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64,
+ D23_64, D22_64, D21_64, RA_64, FP_64, GP_64,
+ (sequence "S%u_64", 7, 0))>;
+
+def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
+ GP_64, (sequence "S%u_64", 7, 0))>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 23fabe315cf5..7d819026da96 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===//
+//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,18 +18,20 @@
#include "MipsRelocations.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/PassManager.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,8 +39,6 @@
#include <iomanip>
#endif
-#include "llvm/CodeGen/MachineOperand.h"
-
using namespace llvm;
STATISTIC(NumEmitted, "Number of machine instructions emitted");
@@ -66,9 +66,9 @@ class MipsCodeEmitter : public MachineFunctionPass {
public:
MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
MachineFunctionPass(ID), JTI(0),
- II((const MipsInstrInfo *) tm.getInstrInfo()),
- TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
+ II((const MipsInstrInfo *) tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
}
bool runOnMachineFunction(MachineFunction &MF);
@@ -80,7 +80,7 @@ class MipsCodeEmitter : public MachineFunctionPass {
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
- unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
void emitInstruction(const MachineInstr &MI);
@@ -91,7 +91,7 @@ class MipsCodeEmitter : public MachineFunctionPass {
/// Routines that handle operands which add machine relocations which are
/// fixed up by the relocation stage.
void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const;
+ bool MayNeedFarStub) const;
void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
@@ -105,9 +105,22 @@ class MipsCodeEmitter : public MachineFunctionPass {
unsigned getRelocation(const MachineInstr &MI,
const MachineOperand &MO) const;
+ unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getBranchTargetOpValue(const MachineInstr &MI,
+ unsigned OpNo) const;
unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ int emitULW(const MachineInstr &MI);
+ int emitUSW(const MachineInstr &MI);
+ int emitULH(const MachineInstr &MI);
+ int emitULHu(const MachineInstr &MI);
+ int emitUSH(const MachineInstr &MI);
+
+ void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
+ int Offset) const;
};
}
@@ -132,7 +145,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
@@ -149,30 +162,50 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
if (Form == MipsII::FrmJ)
return Mips::reloc_mips_26;
if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
- && MI.getDesc().isBranch())
- return Mips::reloc_mips_branch;
+ && MI.isBranch())
+ return Mips::reloc_mips_pc16;
if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
return Mips::reloc_mips_hi;
return Mips::reloc_mips_lo;
}
+unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ MachineOperand MO = MI.getOperand(OpNo);
+ if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unexpected jump target operand kind.");
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ MachineOperand MO = MI.getOperand(OpNo);
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ return 0;
+}
+
unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
+ unsigned OpNo) const {
// Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
assert(MI.getOperand(OpNo).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16;
- return
- (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
+ return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
}
unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
+ unsigned OpNo) const {
// size is encoded as size-1.
return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
}
unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
+ unsigned OpNo) const {
// size is encoded as pos+size-1.
return getMachineOpValue(MI, MI.getOperand(OpNo-1)) +
getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
@@ -181,14 +214,20 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
+ const MachineOperand &MO) const {
if (MO.isReg())
- return MipsRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getMipsRegisterNumbering(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
- else if (MO.isSymbol())
+ else if (MO.isGlobal()) {
+ if (MI.getOpcode() == Mips::ULW || MI.getOpcode() == Mips::USW ||
+ MI.getOpcode() == Mips::ULH || MI.getOpcode() == Mips::ULHu)
+ emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 4);
+ else if (MI.getOpcode() == Mips::USH)
+ emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 8);
+ else
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ } else if (MO.isSymbol())
emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
else if (MO.isCPI())
emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
@@ -202,9 +241,18 @@ unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
}
void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const {
+ bool MayNeedFarStub) const {
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), 0, MayNeedFarStub));
+ const_cast<GlobalValue *>(GV), 0,
+ MayNeedFarStub));
+}
+
+void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV,
+ unsigned Reloc, int Offset) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0, false));
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset() + Offset,
+ Reloc, const_cast<GlobalValue *>(GV), 0, false));
}
void MipsCodeEmitter::
@@ -225,11 +273,108 @@ emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
}
void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc) const {
+ unsigned Reloc) const {
MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
Reloc, BB));
}
+int MipsCodeEmitter::emitUSW(const MachineInstr &MI) {
+ unsigned src = getMachineOpValue(MI, MI.getOperand(0));
+ unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+ unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+ // swr src, offset(base)
+ // swl src, offset+3(base)
+ MCE.emitWordLE(
+ (0x2e << 26) | (base << 21) | (src << 16) | (offset & 0xffff));
+ MCE.emitWordLE(
+ (0x2a << 26) | (base << 21) | (src << 16) | ((offset+3) & 0xffff));
+ return 2;
+}
+
+int MipsCodeEmitter::emitULW(const MachineInstr &MI) {
+ unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
+ unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+ unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+ unsigned at = 1;
+ if (dst != base) {
+ // lwr dst, offset(base)
+ // lwl dst, offset+3(base)
+ MCE.emitWordLE(
+ (0x26 << 26) | (base << 21) | (dst << 16) | (offset & 0xffff));
+ MCE.emitWordLE(
+ (0x22 << 26) | (base << 21) | (dst << 16) | ((offset+3) & 0xffff));
+ return 2;
+ } else {
+ // lwr at, offset(base)
+ // lwl at, offset+3(base)
+ // addu dst, at, $zero
+ MCE.emitWordLE(
+ (0x26 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
+ MCE.emitWordLE(
+ (0x22 << 26) | (base << 21) | (at << 16) | ((offset+3) & 0xffff));
+ MCE.emitWordLE(
+ (0x0 << 26) | (at << 21) | (0x0 << 16) | (dst << 11) | (0x0 << 6) | 0x21);
+ return 3;
+ }
+}
+
+int MipsCodeEmitter::emitUSH(const MachineInstr &MI) {
+ unsigned src = getMachineOpValue(MI, MI.getOperand(0));
+ unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+ unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+ unsigned at = 1;
+ // sb src, offset(base)
+ // srl at,src,8
+ // sb at, offset+1(base)
+ MCE.emitWordLE(
+ (0x28 << 26) | (base << 21) | (src << 16) | (offset & 0xffff));
+ MCE.emitWordLE(
+ (0x0 << 26) | (0x0 << 21) | (src << 16) | (at << 11) | (0x8 << 6) | 0x2);
+ MCE.emitWordLE(
+ (0x28 << 26) | (base << 21) | (at << 16) | ((offset+1) & 0xffff));
+ return 3;
+}
+
+int MipsCodeEmitter::emitULH(const MachineInstr &MI) {
+ unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
+ unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+ unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+ unsigned at = 1;
+ // lbu at, offset(base)
+ // lb dst, offset+1(base)
+ // sll dst,dst,8
+ // or dst,dst,at
+ MCE.emitWordLE(
+ (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
+ MCE.emitWordLE(
+ (0x20 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff));
+ MCE.emitWordLE(
+ (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0);
+ MCE.emitWordLE(
+ (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25);
+ return 4;
+}
+
+int MipsCodeEmitter::emitULHu(const MachineInstr &MI) {
+ unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
+ unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+ unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+ unsigned at = 1;
+ // lbu at, offset(base)
+ // lbu dst, offset+1(base)
+ // sll dst,dst,8
+ // or dst,dst,at
+ MCE.emitWordLE(
+ (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
+ MCE.emitWordLE(
+ (0x24 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff));
+ MCE.emitWordLE(
+ (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0);
+ MCE.emitWordLE(
+ (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25);
+ return 4;
+}
+
void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
@@ -239,11 +384,27 @@ void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
return;
- ++NumEmitted; // Keep track of the # of mi's emitted
switch (MI.getOpcode()) {
+ case Mips::USW:
+ NumEmitted += emitUSW(MI);
+ break;
+ case Mips::ULW:
+ NumEmitted += emitULW(MI);
+ break;
+ case Mips::ULH:
+ NumEmitted += emitULH(MI);
+ break;
+ case Mips::ULHu:
+ NumEmitted += emitULHu(MI);
+ break;
+ case Mips::USH:
+ NumEmitted += emitUSH(MI);
+ break;
+
default:
emitWordLE(getBinaryCodeForInstr(MI));
+ ++NumEmitted; // Keep track of the # of mi's emitted
break;
}
@@ -259,7 +420,7 @@ void MipsCodeEmitter::emitWordLE(unsigned Word) {
/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
/// code to the specified MCE object.
FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
- JITCodeEmitter &JCE) {
+ JITCodeEmitter &JCE) {
return new MipsCodeEmitter(TM, JCE);
}
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
new file mode 100644
index 000000000000..075a3e807b1f
--- /dev/null
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -0,0 +1,194 @@
+//===-- MipsCondMov.td - Describe Mips Conditional Moves --*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the Conditional Moves implementation.
+//
+//===----------------------------------------------------------------------===//
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// cond:int, data:int
+class CondMovIntInt<RegisterClass CRC, RegisterClass DRC, bits<6> funct,
+ string instr_asm> :
+ FR<0, funct, (outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
+ let shamt = 0;
+ let Constraints = "$F = $rd";
+}
+
+// cond:int, data:float
+class CondMovIntFP<RegisterClass CRC, RegisterClass DRC, bits<5> fmt,
+ bits<6> func, string instr_asm> :
+ FFR<0x11, func, fmt, (outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
+ !strconcat(instr_asm, "\t$fd, $fs, $rt"), []> {
+ bits<5> rt;
+ let ft = rt;
+ let Constraints = "$F = $fd";
+}
+
+// cond:float, data:int
+class CondMovFPInt<RegisterClass RC, SDNode cmov, bits<1> tf,
+ string instr_asm> :
+ FCMOV<tf, (outs RC:$rd), (ins RC:$rs, RC:$F),
+ !strconcat(instr_asm, "\t$rd, $rs, $$fcc0"),
+ [(set RC:$rd, (cmov RC:$rs, RC:$F))]> {
+ let cc = 0;
+ let Uses = [FCR31];
+ let Constraints = "$F = $rd";
+}
+
+// cond:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+ string instr_asm> :
+ FFCMOV<fmt, tf, (outs RC:$fd), (ins RC:$fs, RC:$F),
+ !strconcat(instr_asm, "\t$fd, $fs, $$fcc0"),
+ [(set RC:$fd, (cmov RC:$fs, RC:$F))]> {
+ let cc = 0;
+ let Uses = [FCR31];
+ let Constraints = "$F = $fd";
+}
+
+// select patterns
+multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
+ Instruction MOVZInst, Instruction SLTOp,
+ Instruction SLTuOp, Instruction SLTiOp,
+ Instruction SLTiuOp> {
+ def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+ def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+ def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+}
+
+multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
+ Instruction MOVZInst, Instruction XOROp> {
+ def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
+ Instruction XOROp> {
+ def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : Pat<(select CRC:$cond, DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
+ def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+// Instantiation of instructions.
+def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
+let Predicates = [HasMips64] in {
+ def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
+ def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz">;
+ def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz">;
+}
+
+def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
+let Predicates = [HasMips64] in {
+ def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
+ def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn">;
+ def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn">;
+}
+
+def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
+def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
+ Requires<[HasMips64]>;
+
+def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
+def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
+ Requires<[HasMips64]>;
+
+let Predicates = [NotFP64bit] in {
+ def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
+ def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
+}
+let Predicates = [IsFP64bit] in {
+ def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
+ def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d">;
+ def MOVN_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">;
+ def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d">;
+}
+
+def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
+def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
+ Requires<[HasMips64]>;
+
+def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
+def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
+ Requires<[HasMips64]>;
+
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+
+let Predicates = [NotFP64bit] in {
+ def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+ def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+let Predicates = [IsFP64bit] in {
+ def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+ def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+// Instantiation of conditional move patterns.
+defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
+defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
+let Predicates = [HasMips64] in {
+ defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
+ defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats0<CPU64Regs, CPU64Regs, MOVZ_I_I64, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats1<CPURegs, CPU64Regs, MOVZ_I_I64, XOR>;
+ defm : MovzPats1<CPU64Regs, CPURegs, MOVZ_I64_I, XOR64>;
+ defm : MovzPats1<CPU64Regs, CPU64Regs, MOVZ_I64_I64, XOR64>;
+}
+
+defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
+let Predicates = [HasMips64] in {
+ defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
+ defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
+ defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
+}
+
+defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
+defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
+defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
+let Predicates = [HasMips64] in {
+ defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
+ defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
+}
+
+let Predicates = [NotFP64bit] in {
+ defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
+ defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
+ defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
+}
+let Predicates = [IsFP64bit] in {
+ defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
+ defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
+ SLTiu64>;
+ defm : MovzPats1<CPURegs, FGR64, MOVZ_I_D64, XOR>;
+ defm : MovzPats1<CPU64Regs, FGR64, MOVZ_I64_D64, XOR64>;
+ defm : MovnPats<CPURegs, FGR64, MOVN_I_D64, XOR>;
+ defm : MovnPats<CPU64Regs, FGR64, MOVN_I64_D64, XOR64>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index be3b7a02ec31..debf2f1b85c1 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,4 +1,4 @@
-//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
LastFiller = MBB.end();
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
++FilledSlots;
Changed = true;
@@ -105,8 +105,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
MBB.splice(llvm::next(I), &MBB, D);
++UsefulSlots;
- }
- else
+ } else
BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
// Record the filler instruction that filled the delay slot.
@@ -146,7 +145,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
|| I->isInlineAsm()
|| I->isLabel()
|| FI == LastFiller
- || I->getDesc().isPseudo()
+ || I->isPseudo()
//
// Should not allow:
// ERET, DERET or WAIT, PAUSE. Need to add these to instruction
@@ -167,23 +166,21 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
}
bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
- bool &sawLoad,
- bool &sawStore,
+ bool &sawLoad, bool &sawStore,
SmallSet<unsigned, 32> &RegDefs,
SmallSet<unsigned, 32> &RegUses) {
if (candidate->isImplicitDef() || candidate->isKill())
return true;
- MCInstrDesc MCID = candidate->getDesc();
// Loads or stores cannot be moved past a store to the delay slot
- // and stores cannot be moved past a load.
- if (MCID.mayLoad()) {
+ // and stores cannot be moved past a load.
+ if (candidate->mayLoad()) {
if (sawStore)
return true;
sawLoad = true;
}
- if (MCID.mayStore()) {
+ if (candidate->mayStore()) {
if (sawStore)
return true;
sawStore = true;
@@ -191,7 +188,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
return true;
}
- assert((!MCID.isCall() && !MCID.isReturn()) &&
+ assert((!candidate->isCall() && !candidate->isReturn()) &&
"Cannot put calls or returns in delay slot.");
for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
@@ -221,11 +218,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
SmallSet<unsigned, 32>& RegUses) {
// If MI is a call or return, just examine the explicit non-variadic operands.
MCInstrDesc MCID = MI->getDesc();
- unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
- MI->getNumOperands();
-
- // Add RA to RegDefs to prevent users of RA from going into delay slot.
- if (MCID.isCall())
+ unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
+ MI->getNumOperands();
+
+ // Add RA to RegDefs to prevent users of RA from going into delay slot.
+ if (MI->isCall())
RegDefs.insert(Mips::RA);
for (unsigned i = 0; i != e; ++i) {
@@ -247,7 +244,7 @@ bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
if (RegSet.count(Reg))
return true;
// check Aliased Registers
- for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
*Alias; ++Alias)
if (RegSet.count(*Alias))
return true;
diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp
index 03d922fe7cd6..119d1a824688 100644
--- a/lib/Target/Mips/MipsEmitGPRestore.cpp
+++ b/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -1,4 +1,4 @@
-//===-- MipsEmitGPRestore.cpp - Emit GP restore instruction----------------===//
+//===-- MipsEmitGPRestore.cpp - Emit GP Restore Instruction ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -44,11 +44,14 @@ namespace {
} // end of anonymous namespace
bool Inserter::runOnMachineFunction(MachineFunction &F) {
- if (TM.getRelocationModel() != Reloc::PIC_)
+ MipsFunctionInfo *MipsFI = F.getInfo<MipsFunctionInfo>();
+
+ if ((TM.getRelocationModel() != Reloc::PIC_) ||
+ (!MipsFI->globalBaseRegFixed()))
return false;
bool Changed = false;
- int FI = F.getInfo<MipsFunctionInfo>()->getGPFI();
+ int FI = MipsFI->getGPFI();
for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
MFI != MFE; ++MFI) {
@@ -60,7 +63,7 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
if (MBB.isLandingPad()) {
// Find EH_LABEL first.
for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ;
-
+
// Insert lw.
++I;
DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
@@ -81,7 +84,7 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
.addImm(0);
Changed = true;
}
- }
+ }
return Changed;
}
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index a622258a4dcb..baeae97a4f52 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -1,4 +1,4 @@
-//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//===-- MipsExpandPseudo.cpp - Expand Pseudo Instructions ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -64,16 +64,22 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
const MCInstrDesc& MCid = I->getDesc();
switch(MCid.getOpcode()) {
- default:
+ default:
++I;
continue;
+ case Mips::SETGP2:
+ // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9"
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu),
+ I->getOperand(0).getReg())
+ .addReg(Mips::V0).addReg(I->getOperand(1).getReg());
+ break;
case Mips::BuildPairF64:
ExpandBuildPairF64(MBB, I);
break;
case Mips::ExtractElementF64:
ExpandExtractElementF64(MBB, I);
break;
- }
+ }
// delete original instr
MBB.erase(I++);
@@ -84,12 +90,12 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
}
void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
- MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator I) {
unsigned DstReg = I->getOperand(0).getReg();
unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
- const unsigned* SubReg =
+ const uint16_t* SubReg =
TM.getRegisterInfo()->getSubRegisters(DstReg);
// mtc1 Lo, $fp
@@ -105,12 +111,12 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
unsigned N = I->getOperand(2).getImm();
const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
DebugLoc dl = I->getDebugLoc();
- const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+ const uint16_t* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
}
-/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
/// instrs into real instrs
FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
return new MipsExpandPseudo(tm);
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 22d1e47b1a2b..f8ea3d0321d2 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//===-- MipsFrameLowering.cpp - Mips Frame Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "MipsFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -84,55 +86,44 @@ using namespace llvm;
// if frame pointer elimination is disabled.
bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
- || MFI->isFrameAddressTaken();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
return true;
}
-static unsigned AlignOffset(unsigned Offset, unsigned Align) {
- return (Offset + Align - 1) / Align * Align;
-}
-
-// expand pair of register and immediate if the immediate doesn't fit in the
-// 16-bit offset field.
-// e.g.
-// if OrigImm = 0x10000, OrigReg = $sp:
-// generate the following sequence of instrs:
-// lui $at, hi(0x10000)
-// addu $at, $sp, $at
-//
-// (NewReg, NewImm) = ($at, lo(Ox10000))
-// return true
-static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
- unsigned& NewReg, int& NewImm,
- MachineBasicBlock& MBB,
- MachineBasicBlock::iterator I) {
- // OrigImm fits in the 16-bit field
- if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
- NewReg = OrigReg;
- NewImm = OrigImm;
- return false;
- }
+// Build an instruction sequence to load an immediate that is too large to fit
+// in 16-bit and add the result to Reg.
+static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64,
+ const MipsInstrInfo &TII, MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL) {
+ unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
+ unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu;
+ unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
+ MipsAnalyzeImmediate AnalyzeImm;
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */);
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+ // The first instruction can be a LUi, which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == LUi)
+ BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+ else
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
- MachineFunction* MF = MBB.getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- DebugLoc DL = I->getDebugLoc();
- int ImmLo = (short)(OrigImm & 0xffff);
- int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
- ((OrigImm & 0x8000) != 0);
-
- // FIXME: change this when mips goes MC".
- BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
- BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
- BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
- .addReg(Mips::AT);
- NewReg = Mips::AT;
- NewImm = ImmLo;
+ // Build the remaining instructions in Seq.
+ for (++Inst; Inst != Seq.end(); ++Inst)
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
- return true;
+ BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg);
}
void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -146,29 +137,41 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
- unsigned NewReg = 0;
- int NewImm = 0;
- bool ATUsed;
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
// First, compute final stack size.
unsigned RegSize = STI.isGP32bit() ? 4 : 8;
unsigned StackAlign = getStackAlignment();
- unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ?
+ unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ?
(MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) :
MipsFI->getMaxCallFrameSize();
- unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) +
- AlignOffset(MFI->getStackSize(), StackAlign);
+ uint64_t StackSize = RoundUpToAlignment(LocalVarAreaOffset, StackAlign) +
+ RoundUpToAlignment(MFI->getStackSize(), StackAlign);
// Update stack size
- MFI->setStackSize(StackSize);
-
- BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
-
- // TODO: check need from GP here.
- if (isPIC && STI.isABI_O32())
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
- .addReg(RegInfo->getPICCallReg());
- BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+ MFI->setStackSize(StackSize);
+
+ // Emit instructions that set the global base register if the target ABI is
+ // O32.
+ if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32() &&
+ !MipsFI->globalBaseRegFixed()) {
+ // See MipsInstrInfo.td for explanation.
+ MachineBasicBlock *NewEntry = MF.CreateMachineBasicBlock();
+ MF.insert(&MBB, NewEntry);
+ NewEntry->addSuccessor(&MBB);
+
+ // Copy live in registers.
+ for (MachineBasicBlock::livein_iterator R = MBB.livein_begin();
+ R != MBB.livein_end(); ++R)
+ NewEntry->addLiveIn(*R);
+
+ BuildMI(*NewEntry, NewEntry->begin(), dl, TII.get(Mips:: SETGP01),
+ Mips::V0);
+ }
// No need to allocate space on the stack.
if (StackSize == 0 && !MFI->adjustsStack()) return;
@@ -177,15 +180,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
MachineLocation DstML, SrcML;
- // Adjust stack : addi sp, sp, (-imm)
- ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
- MBBI);
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(NewReg).addImm(NewImm);
-
- // FIXME: change this when mips goes MC".
- if (ATUsed)
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+ // Adjust stack.
+ if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize)
+ BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
+ else { // Expand immediate that doesn't fit in 16-bit.
+ MipsFI->setEmitNOAT();
+ expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
+ }
// emit ".cfi_def_cfa_offset StackSize"
MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
@@ -202,13 +203,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
// register to the stack.
for (unsigned i = 0; i < CSI.size(); ++i)
++MBBI;
-
+
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl,
TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
-
+
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
E = CSI.end(); I != E; ++I) {
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
@@ -217,7 +218,7 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
// If Reg is a double precision register, emit two cfa_offsets,
// one for each of the paired single precision registers.
if (Mips::AFGR64RegisterClass->contains(Reg)) {
- const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ const uint16_t *SubRegs = RegInfo->getSubRegisters(Reg);
MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
MachineLocation SrcML0(*SubRegs);
@@ -236,19 +237,18 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
}
}
- }
+ }
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
- // Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
- .addReg(Mips::SP).addReg(Mips::ZERO);
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
- // emit ".cfi_def_cfa_register $fp"
+ // emit ".cfi_def_cfa_register $fp"
MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl,
TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
- DstML = MachineLocation(Mips::FP);
+ DstML = MachineLocation(FP);
SrcML = MachineLocation(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
}
@@ -256,12 +256,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
// Restore GP from the saved stack location
if (MipsFI->needGPSaveRestore()) {
unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI());
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset);
-
- if (Offset >= 0x8000) {
- BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO));
- BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
- }
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset)
+ .addReg(Mips::GP);
}
}
@@ -272,59 +268,59 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
const MipsInstrInfo &TII =
*static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
-
- // Get the number of bytes from FrameInfo
- unsigned StackSize = MFI->getStackSize();
-
- unsigned NewReg = 0;
- int NewImm = 0;
- bool ATUsed = false;
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
// if framepointer enabled, restore the stack pointer.
if (hasFP(MF)) {
// Find the first instruction that restores a callee-saved register.
MachineBasicBlock::iterator I = MBBI;
-
+
for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
--I;
// Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP)
- .addReg(Mips::FP).addReg(Mips::ZERO);
+ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
}
- // adjust stack : insert addi sp, sp, (imm)
- if (StackSize) {
- ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB,
- MBBI);
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(NewReg).addImm(NewImm);
+ // Get the number of bytes from FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
- // FIXME: change this when mips goes MC".
- if (ATUsed)
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
- }
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize)
+ BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
+ else // Expand immediate that doesn't fit in 16-bit.
+ expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
}
void MipsFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineRegisterInfo& MRI = MF.getRegInfo();
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
// FIXME: remove this code if register allocator can correctly mark
// $fp and $ra used or unused.
// Mark $fp and $ra as used or unused.
if (hasFP(MF))
- MRI.setPhysRegUsed(Mips::FP);
+ MRI.setPhysRegUsed(FP);
- // The register allocator might determine $ra is used after seeing
+ // The register allocator might determine $ra is used after seeing
// instruction "jr $ra", but we do not want PrologEpilogInserter to insert
// instructions to save/restore $ra unless there is a function call.
// To correct this, $ra is explicitly marked unused if there is no
// function call.
if (MF.getFrameInfo()->hasCalls())
MRI.setPhysRegUsed(Mips::RA);
- else
+ else {
MRI.setPhysRegUnused(Mips::RA);
+ MRI.setPhysRegUnused(Mips::RA_64);
+ }
}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index c24975614c8d..bd1d89f04bc1 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -1,4 +1,4 @@
-//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//===-- MipsFrameLowering.h - Define frame lowering for Mips ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 9c831ede9dbf..f0651c61311b 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -1,4 +1,4 @@
-//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//===-- MipsISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,10 +13,12 @@
#define DEBUG_TYPE "mips-isel"
#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
@@ -28,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -63,6 +66,7 @@ public:
return "MIPS DAG->DAG Pattern Instruction Selection";
}
+ virtual bool runOnMachineFunction(MachineFunction &MF);
private:
// Include the pieces autogenerated from the target description.
@@ -81,17 +85,24 @@ private:
}
SDNode *getGlobalBaseReg();
+
+ std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
SDNode *Select(SDNode *N);
// Complex Pattern.
- bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset);
- // getI32Imm - Return a target constant with the specified
- // value, of type i32.
- inline SDValue getI32Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
+ void ProcessFunctionAfterISel(MachineFunction &MF);
+ bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+ void InitGlobalBaseReg(MachineFunction &MF);
+
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
@@ -99,20 +110,163 @@ private:
}
+// Insert instructions to initialize the global base register in the
+// first MBB of the function. When the ABI is O32 and the relocation model is
+// PIC, the necessary instructions are emitted later to prevent optimization
+// passes from moving them.
+void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ bool FixGlobalBaseReg = MipsFI->globalBaseRegFixed();
+
+ if (Subtarget.isABI_O32() && FixGlobalBaseReg)
+ // $gp is the global base register.
+ V0 = V1 = GlobalBaseReg;
+ else {
+ const TargetRegisterClass *RC;
+ RC = Subtarget.isABI_N64() ?
+ Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ }
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ } else if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ } else {
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ } else if (!MipsFI->globalBaseRegFixed()) {
+ assert(Subtarget.isABI_O32());
+
+ BuildMI(MBB, I, DL, TII.get(Mips::SETGP2), GlobalBaseReg)
+ .addReg(Mips::T9);
+ }
+ }
+}
+
+bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E; ++U) {
+ MachineOperand &MO = U.getOperand();
+ MachineInstr *MI = MO.getParent();
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
+ InitGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ ReplaceUsesWithZeroReg(MRI, *I);
+}
+
+bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
+
+ ProcessFunctionAfterISel(MF);
+
+ return Ret;
+}
/// getGlobalBaseReg - Output the instructions required to put the
/// GOT address into a register.
SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
- unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::
-SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
EVT ValTy = Addr.getValueType();
- unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64;
+
+ // If Parent is an unaligned f32 load or store, select a (base + index)
+ // floating point load/store instruction (luxc1 or suxc1).
+ const LSBaseSDNode* LS = 0;
+
+ if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
+ EVT VT = LS->getMemoryVT();
+
+ if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
+ assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
+ "Unaligned loads/stores not supported for this type.");
+ if (VT == MVT::f32)
+ return false;
+ }
+ }
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
@@ -122,21 +276,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
}
// on PIC code Load GA
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if (Addr.getOpcode() == MipsISD::WrapperPIC) {
- Base = CurDAG->getRegister(GPReg, ValTy);
- Offset = Addr.getOperand(0);
- return true;
- }
- } else {
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress))
return false;
- else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
- Base = CurDAG->getRegister(GPReg, ValTy);
- Offset = Addr;
- return true;
- }
}
// Addresses of the form FI+const or FI|const
@@ -166,17 +315,20 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
// Generate:
// lui $2, %hi($CPI1_0)
// lwc1 $f0, %lo($CPI1_0)($2)
- if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
- Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
- Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
SDValue LoVal = Addr.getOperand(1);
- if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
+ if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
Base = Addr.getOperand(0);
Offset = LoVal.getOperand(0);
return true;
}
}
+
+ // If an indexed floating point load/store can be emitted, return false.
+ if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasMips32r2Or64())
+ return false;
}
Base = Addr;
@@ -184,6 +336,28 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
return true;
}
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ Lo = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64, dl,
+ Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi)
+ Hi = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64, dl,
+ Ty, InFlag);
+
+ return std::make_pair(Lo, Hi);
+}
+
+
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -203,123 +377,167 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
// Instruction Selection not handled by the auto-generated
// tablegen selection should be handled here.
///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
switch(Opcode) {
- default: break;
-
- case ISD::SUBE:
- case ISD::ADDE: {
- SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned MOp;
- if (Opcode == ISD::ADDE) {
- CmpLHS = InFlag.getValue(0);
- MOp = Mips::ADDu;
- } else {
- CmpLHS = InFlag.getOperand(0);
- MOp = Mips::SUBu;
- }
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::ADDu;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SUBu;
+ }
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
- EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
- SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
- SDValue(Carry,0), RHS);
+ EVT VT = LHS.getValueType();
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+ SDValue(Carry,0), RHS);
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
- LHS, SDValue(AddCarry,0));
- }
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
+ LHS, SDValue(AddCarry,0));
+ }
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- assert(Node->getValueType(0) != MVT::i64 &&
- "64-bit multiplication with two results not handled.");
- SDValue Op1 = Node->getOperand(0);
- SDValue Op2 = Node->getOperand(1);
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
- unsigned Op;
- Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
+ true, true);
- SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
- SDValue InFlag = SDValue(Mul, 0);
- SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
- MVT::Glue, InFlag);
- InFlag = SDValue(Lo,1);
- SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
+ return NULL;
+ }
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
+ dl, NodeTy, true, false).first;
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
+ }
- return NULL;
- }
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
- /// Special Muls
- case ISD::MUL:
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32)
- break;
- case ISD::MULHS:
- case ISD::MULHU: {
- assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) &&
- "64-bit MULH* not handled.");
- EVT Ty = Node->getValueType(0);
- SDValue MulOp1 = Node->getOperand(0);
- SDValue MulOp2 = Node->getOperand(1);
-
- unsigned MulOp = (Opcode == ISD::MULHU ?
- Mips::MULTu :
- (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT));
- SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
- MVT::Glue, MulOp1, MulOp2);
-
- SDValue InFlag = SDValue(MulNode, 0);
-
- if (Opcode == ISD::MUL) {
- unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- return CurDAG->getMachineNode(Opc, dl, Ty, InFlag);
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Mips::ZERO_64, MVT::i64);
+ return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
}
- else
- return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Mips::ZERO, MVT::i32);
+ return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
+ Zero);
}
+ break;
+ }
- // Get target GOT address.
- case ISD::GLOBAL_OFFSET_TABLE:
- return getGlobalBaseReg();
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO, MVT::i32);
- return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
- Zero);
- }
+ if (Size == 32)
break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
}
- case MipsISD::ThreadPointer: {
- unsigned SrcReg = Mips::HWR29;
- unsigned DestReg = Mips::V1;
- SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
- Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
- SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
- ReplaceUses(SDValue(Node, 0), ResNode);
- return ResNode.getNode();
+ return RegOpnd;
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
}
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return ResNode.getNode();
+ }
}
// Select the default instruction
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 1932e745c593..6a23bc3d1d7c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -18,12 +18,13 @@
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
-#include "InstPrinter/MipsInstPrinter.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -35,27 +36,29 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-// If I is a shifted mask, set the size (Size) and the first bit of the
+// If I is a shifted mask, set the size (Size) and the first bit of the
// mask (Pos), and return true.
-// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
+// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
- if (!isUInt<32>(I) || !isShiftedMask_32(I))
+ if (!isShiftedMask_64(I))
return false;
- Size = CountPopulation_32(I);
- Pos = CountTrailingZeros_32(I);
+ Size = CountPopulation_64(I);
+ Pos = CountTrailingZeros_64(I);
return true;
}
+static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
+ MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
+ return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
+}
+
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case MipsISD::JmpLink: return "MipsISD::JmpLink";
case MipsISD::Hi: return "MipsISD::Hi";
case MipsISD::Lo: return "MipsISD::Lo";
case MipsISD::GPRel: return "MipsISD::GPRel";
- case MipsISD::TlsGd: return "MipsISD::TlsGd";
- case MipsISD::TprelHi: return "MipsISD::TprelHi";
- case MipsISD::TprelLo: return "MipsISD::TprelLo";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
@@ -71,7 +74,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::DivRemU: return "MipsISD::DivRemU";
case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
- case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
+ case MipsISD::Wrapper: return "MipsISD::Wrapper";
case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
@@ -84,7 +87,8 @@ MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
: TargetLowering(TM, new MipsTargetObjectFile()),
Subtarget(&TM.getSubtarget<MipsSubtarget>()),
- HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) {
+ HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
+ IsO32(Subtarget->isABI_O32()) {
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
@@ -93,17 +97,20 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Set up the register classes
addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
- addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
if (HasMips64)
addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass);
- // When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
- addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
- else
- addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
+ else
+ addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+ }
}
// Load extented operations for i1 types must be promoted
@@ -123,7 +130,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips Custom Operations
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
@@ -131,9 +137,30 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (!TM.Options.NoNaNsFPMath) {
+ setOperationAction(ISD::FABS, MVT::f32, Custom);
+ setOperationAction(ISD::FABS, MVT::f64, Custom);
+ }
+
+ if (HasMips64) {
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ }
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -149,10 +176,18 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -165,8 +200,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
@@ -180,9 +213,18 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FEXP, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ if (!TM.Options.NoNaNsFPMath) {
+ setOperationAction(ISD::FNEG, MVT::f32, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ }
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
@@ -192,11 +234,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setInsertFencesForAtomic(true);
@@ -208,32 +249,46 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
}
- if (!Subtarget->hasBitCount())
+ if (!Subtarget->hasBitCount()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+ }
- if (!Subtarget->hasSwap())
+ if (!Subtarget->hasSwap()) {
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+ }
setTargetDAGCombine(ISD::ADDE);
setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
- setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(HasMips64 ? 3 : 2);
- setStackPointerRegisterToSaveRestore(Mips::SP);
+ setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
computeRegisterProperties();
- setExceptionPointerRegister(Mips::A0);
- setExceptionSelectorRegister(Mips::A1);
+ setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
+ setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
}
bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
- return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ case MVT::i16:
+ return true;
+ case MVT::f32:
+ return Subtarget->hasMips32r2Or64();
+ default:
+ return false;
+ }
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
@@ -290,8 +345,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
// create MipsMAdd(u) node
MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
- SDValue MAdd = CurDAG->getNode(MultOpc, dl,
- MVT::Glue,
+ SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue,
MultNode->getOperand(0),// Factor 0
MultNode->getOperand(1),// Factor 1
ADDCNode->getOperand(1),// Lo0
@@ -364,8 +418,7 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
// create MipsSub(u) node
MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
- SDValue MSub = CurDAG->getNode(MultOpc, dl,
- MVT::Glue,
+ SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue,
MultNode->getOperand(0),// Factor 0
MultNode->getOperand(1),// Factor 1
SUBCNode->getOperand(0),// Lo0
@@ -394,7 +447,8 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->hasMips32() && SelectMadd(N, &DAG))
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ SelectMadd(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -406,7 +460,8 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->hasMips32() && SelectMsub(N, &DAG))
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ SelectMsub(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -419,8 +474,8 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
EVT Ty = N->getValueType(0);
- unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
- unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
MipsISD::DivRemU;
DebugLoc dl = N->getDebugLoc();
@@ -481,11 +536,10 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
return false;
- if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
- return true;
+ assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
+ "Illegal Condition Code");
- assert(false && "Illegal Condition Code");
- return false;
+ return true;
}
// Creates and returns an FPCmp node from a setcc node.
@@ -522,21 +576,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+ SDValue SetCC = N->getOperand(0);
- if (Cond.getOpcode() != MipsISD::FPCmp)
+ if ((SetCC.getOpcode() != ISD::SETCC) ||
+ !SetCC.getOperand(0).getValueType().isInteger())
return SDValue();
- SDValue True = DAG.getConstant(1, MVT::i32);
- SDValue False = DAG.getConstant(0, MVT::i32);
+ SDValue False = N->getOperand(2);
+ EVT FalseTy = False.getValueType();
+
+ if (!FalseTy.isInteger())
+ return SDValue();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False);
+
+ if (!CN || CN->getZExtValue())
+ return SDValue();
- return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+ const DebugLoc DL = N->getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ SDValue True = N->getOperand(1);
+
+ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
+ SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+
+ return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
@@ -549,20 +619,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
-
+ unsigned ShiftRightOpc = ShiftRight.getOpcode();
+
// Op's first operand must be a shift right.
- if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+ if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
return SDValue();
// The second operand of the shift must be an immediate.
- uint64_t Pos;
ConstantSDNode *CN;
if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
return SDValue();
-
- Pos = CN->getZExtValue();
+ uint64_t Pos = CN->getZExtValue();
uint64_t SMPos, SMSize;
+
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
!IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
@@ -570,21 +640,21 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
// Return if the shifted mask does not start at bit 0 or the sum of its size
// and Pos exceeds the word's size.
- if (SMPos != 0 || Pos + SMSize > 32)
+ EVT ValTy = N->getValueType(0);
+ if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
return SDValue();
- return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
- ShiftRight.getOperand(0),
- DAG.getConstant(Pos, MVT::i32),
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy,
+ ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32),
DAG.getConstant(SMSize, MVT::i32));
}
-
+
static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget* Subtarget) {
// Pattern match INS.
// $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
- // where mask1 = (2**size - 1) << pos, mask0 = ~mask1
+ // where mask1 = (2**size - 1) << pos, mask0 = ~mask1
// => ins $dst, $src, size, pos, $src1
if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
return SDValue();
@@ -604,7 +674,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
// See if Op's second operand matches (and (shl $src, pos), mask1).
if (And1.getOpcode() != ISD::AND)
return SDValue();
-
+
if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
!IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
return SDValue();
@@ -623,17 +693,16 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
unsigned Shamt = CN->getZExtValue();
// Return if the shift amount and the first bit position of mask are not the
- // same.
- if (Shamt != SMPos0)
+ // same.
+ EVT ValTy = N->getValueType(0);
+ if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
return SDValue();
-
- return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
- Shl.getOperand(0),
+
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, Shl.getOperand(0),
DAG.getConstant(SMPos0, MVT::i32),
- DAG.getConstant(SMSize0, MVT::i32),
- And0.getOperand(0));
+ DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
}
-
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -648,8 +717,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case ISD::SDIVREM:
case ISD::UDIVREM:
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
- case ISD::SETCC:
- return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return PerformANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -672,8 +741,10 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
@@ -689,7 +760,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
// MachineFunction as a live in value. It also creates a corresponding
// virtual register for it.
static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
{
assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -702,12 +773,13 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
return Mips::BRANCH_T;
- if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
- return Mips::BRANCH_F;
+ assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
+ "Invalid CondCode.");
- return Mips::BRANCH_INVALID;
+ return Mips::BRANCH_F;
}
+/*
static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
DebugLoc dl,
const MipsSubtarget* Subtarget,
@@ -783,89 +855,115 @@ static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
-
+*/
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
-
switch (MI->getOpcode()) {
- default:
- assert(false && "Unexpected instr type to insert");
- return NULL;
- case Mips::MOVT:
- case Mips::MOVT_S:
- case Mips::MOVT_D:
- return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1F);
- case Mips::MOVF:
- case Mips::MOVF_S:
- case Mips::MOVF_D:
- return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1T);
- case Mips::MOVZ_I:
- case Mips::MOVZ_S:
- case Mips::MOVZ_D:
- return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BNE);
- case Mips::MOVN_I:
- case Mips::MOVN_S:
- case Mips::MOVN_D:
- return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BEQ);
-
+ default: llvm_unreachable("Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
+ case Mips::ATOMIC_LOAD_ADD_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I16:
+ case Mips::ATOMIC_LOAD_ADD_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I32:
+ case Mips::ATOMIC_LOAD_ADD_I32_P8:
return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I64:
+ case Mips::ATOMIC_LOAD_ADD_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, Mips::DADDu);
case Mips::ATOMIC_LOAD_AND_I8:
+ case Mips::ATOMIC_LOAD_AND_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I16:
+ case Mips::ATOMIC_LOAD_AND_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I32:
+ case Mips::ATOMIC_LOAD_AND_I32_P8:
return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I64:
+ case Mips::ATOMIC_LOAD_AND_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, Mips::AND64);
case Mips::ATOMIC_LOAD_OR_I8:
+ case Mips::ATOMIC_LOAD_OR_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I16:
+ case Mips::ATOMIC_LOAD_OR_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I32:
+ case Mips::ATOMIC_LOAD_OR_I32_P8:
return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I64:
+ case Mips::ATOMIC_LOAD_OR_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, Mips::OR64);
case Mips::ATOMIC_LOAD_XOR_I8:
+ case Mips::ATOMIC_LOAD_XOR_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I16:
+ case Mips::ATOMIC_LOAD_XOR_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I32:
+ case Mips::ATOMIC_LOAD_XOR_I32_P8:
return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I64:
+ case Mips::ATOMIC_LOAD_XOR_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, Mips::XOR64);
case Mips::ATOMIC_LOAD_NAND_I8:
+ case Mips::ATOMIC_LOAD_NAND_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
case Mips::ATOMIC_LOAD_NAND_I16:
+ case Mips::ATOMIC_LOAD_NAND_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
case Mips::ATOMIC_LOAD_NAND_I32:
+ case Mips::ATOMIC_LOAD_NAND_I32_P8:
return EmitAtomicBinary(MI, BB, 4, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I64:
+ case Mips::ATOMIC_LOAD_NAND_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, 0, true);
case Mips::ATOMIC_LOAD_SUB_I8:
+ case Mips::ATOMIC_LOAD_SUB_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I16:
+ case Mips::ATOMIC_LOAD_SUB_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I32:
+ case Mips::ATOMIC_LOAD_SUB_I32_P8:
return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I64:
+ case Mips::ATOMIC_LOAD_SUB_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu);
case Mips::ATOMIC_SWAP_I8:
+ case Mips::ATOMIC_SWAP_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, 0);
case Mips::ATOMIC_SWAP_I16:
+ case Mips::ATOMIC_SWAP_I16_P8:
return EmitAtomicBinaryPartword(MI, BB, 2, 0);
case Mips::ATOMIC_SWAP_I32:
+ case Mips::ATOMIC_SWAP_I32_P8:
return EmitAtomicBinary(MI, BB, 4, 0);
+ case Mips::ATOMIC_SWAP_I64:
+ case Mips::ATOMIC_SWAP_I64_P8:
+ return EmitAtomicBinary(MI, BB, 8, 0);
case Mips::ATOMIC_CMP_SWAP_I8:
+ case Mips::ATOMIC_CMP_SWAP_I8_P8:
return EmitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16:
+ case Mips::ATOMIC_CMP_SWAP_I16_P8:
return EmitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
+ case Mips::ATOMIC_CMP_SWAP_I32_P8:
return EmitAtomicCmpSwap(MI, BB, 4);
+ case Mips::ATOMIC_CMP_SWAP_I64:
+ case Mips::ATOMIC_CMP_SWAP_I64_P8:
+ return EmitAtomicCmpSwap(MI, BB, 8);
}
}
@@ -875,13 +973,31 @@ MachineBasicBlock *
MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
- assert(Size == 4 && "Unsupported size for EmitAtomicBinary.");
+ assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ unsigned LL, SC, AND, NOR, ZERO, BEQ;
+
+ if (Size == 4) {
+ LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ AND = Mips::AND;
+ NOR = Mips::NOR;
+ ZERO = Mips::ZERO;
+ BEQ = Mips::BEQ;
+ }
+ else {
+ LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
+ SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ AND = Mips::AND64;
+ NOR = Mips::NOR64;
+ ZERO = Mips::ZERO_64;
+ BEQ = Mips::BEQ64;
+ }
unsigned OldVal = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -919,23 +1035,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// sc success, storeval, 0(ptr)
// beq success, $0, loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
if (Nand) {
// and andres, oldval, incr
// nor storeval, $0, andres
- BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr);
- BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal)
- .addReg(Mips::ZERO).addReg(AndRes);
+ BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
} else if (BinOpcode) {
// <binop> storeval, oldval, incr
BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
} else {
StoreVal = Incr;
}
- BuildMI(BB, dl, TII->get(Mips::SC), Success)
- .addReg(StoreVal).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+ BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
MI->eraseFromParent(); // The instruction is gone now.
@@ -955,6 +1068,8 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -992,8 +1107,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(loopMBB);
@@ -1025,7 +1139,6 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
-
// atomic.load.binop
// loopMBB:
// ll oldval,0(alignedaddr)
@@ -1046,7 +1159,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// beq success,$0,loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
// and andres, oldval, incr2
// nor binopres, $0, andres
@@ -1064,12 +1177,12 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// and newval, incr2, mask
BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
-
+
BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal0).addReg(NewVal);
- BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ BuildMI(BB, dl, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
@@ -1100,13 +1213,29 @@ MachineBasicBlock *
MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
- assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap.");
+ assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ unsigned LL, SC, ZERO, BNE, BEQ;
+
+ if (Size == 4) {
+ LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ ZERO = Mips::ZERO;
+ BNE = Mips::BNE;
+ BEQ = Mips::BEQ;
+ }
+ else {
+ LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
+ SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ ZERO = Mips::ZERO_64;
+ BNE = Mips::BNE64;
+ BEQ = Mips::BEQ64;
+ }
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -1128,8 +1257,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
@@ -1145,18 +1273,18 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// ll dest, 0(ptr)
// bne dest, oldval, exitMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BNE))
+ BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(BNE))
.addReg(Dest).addReg(OldVal).addMBB(exitMBB);
// loop2MBB:
// sc success, newval, 0(ptr)
// beq success, $0, loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ BuildMI(BB, dl, TII->get(SC), Success)
.addReg(NewVal).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(BEQ))
+ .addReg(Success).addReg(ZERO).addMBB(loop1MBB);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1175,6 +1303,8 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+ unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -1215,8 +1345,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(loop1MBB);
@@ -1265,7 +1394,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// and maskedoldval0,oldval,mask
// bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::BNE))
@@ -1281,7 +1410,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal1).addReg(ShiftedNewVal);
- BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ BuildMI(BB, dl, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
@@ -1313,6 +1442,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
{
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP;
assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
@@ -1324,20 +1454,19 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
DebugLoc dl = Op.getDebugLoc();
// Get a reference from Mips stack pointer
- SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy());
// Subtract the dynamic size from the actual stack size to
// obtain the new stack size.
- SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size);
// The Sub result contains the new stack start address, so it
// must be placed in the stack pointer register.
- Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
- SDValue());
+ Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue());
// This node always has two return values: a new stack pointer
// value and a chain
- SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+ SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other);
SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
@@ -1381,11 +1510,23 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const
Op.getDebugLoc());
}
+SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = CreateFPCmp(DAG, Op);
+
+ assert(Cond.getOpcode() == MipsISD::FPCmp &&
+ "Floating point operand expected.");
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+}
+
SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
SDVTList VTs = DAG.getVTList(MVT::i32);
@@ -1413,21 +1554,20 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
EVT ValTy = Op.getValueType();
bool HasGotOfst = (GV->hasInternalLinkage() ||
(GV->hasLocalLinkage() && !isa<Function>(GV)));
- unsigned GotFlag = IsN64 ?
+ unsigned GotFlag = HasMips64 ?
(HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
- MipsII::MO_GOT;
+ (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16);
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
- GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
- SDValue ResNode = DAG.getLoad(ValTy, dl,
- DAG.getEntryNode(), GA, MachinePointerInfo(),
- false, false, 0);
+ GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), GA);
+ SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA,
+ MachinePointerInfo(), false, false, false, 0);
// On functions and global targets not internal linked only
// a load from got/GP is necessary for PIC to work.
if (!HasGotOfst)
return ResNode;
SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0,
- IsN64 ? MipsII::MO_GOT_OFST :
- MipsII::MO_ABS_LO);
+ HasMips64 ? MipsII::MO_GOT_OFST :
+ MipsII::MO_ABS_LO);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo);
return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo);
}
@@ -1438,35 +1578,34 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
// %hi/%lo relocation
- SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
- MipsII::MO_ABS_HI);
- SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
- MipsII::MO_ABS_LO);
+ SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_HI);
+ SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_LO);
SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
}
- SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
- MipsII::MO_GOT);
- BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset);
- SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
- MipsII::MO_ABS_LO);
- SDValue Load = DAG.getLoad(MVT::i32, dl,
- DAG.getEntryNode(), BAGOTOffset,
- MachinePointerInfo(), false, false, 0);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ EVT ValTy = Op.getValueType();
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag);
+ BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy,
+ GetGlobalReg(DAG, ValTy), BAGOTOffset);
+ SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag);
+ SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset,
+ MachinePointerInfo(), false, false, false, 0);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset);
+ return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
}
SDValue MipsTargetLowering::
LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
- // If the relocation model is PIC, use the General Dynamic TLS Model,
- // otherwise use the Initial Exec or Local Exec TLS Model.
- // TODO: implement Local Dynamic TLS model
+ // If the relocation model is PIC, use the General Dynamic TLS Model or
+ // Local Dynamic TLS model, otherwise use the Initial Exec or
+ // Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
DebugLoc dl = GA->getDebugLoc();
@@ -1475,45 +1614,63 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
// General Dynamic TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
- 0, MipsII::MO_TLSGD);
- SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
- SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
- SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+ bool LocalDynamic = GV->hasInternalLinkage();
+ unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD;
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
+ SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT,
+ GetGlobalReg(DAG, PtrVT), TGA);
+ unsigned PtrSize = PtrVT.getSizeInBits();
+ IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+ SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = PtrTy;
Args.push_back(Entry);
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(DAG.getEntryNode(),
- (Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false, true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
- dl);
- return CallResult.first;
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(DAG.getEntryNode(), PtrTy,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false, /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
+ TlsGetAddr, Args, DAG, dl);
+
+ SDValue Ret = CallResult.first;
+
+ if (!LocalDynamic)
+ return Ret;
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ MipsII::MO_DTPREL_HI);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ MipsII::MO_DTPREL_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
}
SDValue Offset;
if (GV->isDeclaration()) {
// Initial Exec TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_GOTTPREL);
- Offset = DAG.getLoad(MVT::i32, dl,
+ TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+ TGA);
+ Offset = DAG.getLoad(PtrVT, dl,
DAG.getEntryNode(), TGA, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
} else {
// Local Exec TLS Model
- SDVTList VTs = DAG.getVTList(MVT::i32);
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_HI);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_LO);
- SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
- SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
- Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+ Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
}
SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
@@ -1523,34 +1680,30 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
- SDValue ResNode;
- SDValue HiPart;
+ SDValue HiPart, JTI, JTILo;
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
-
EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
-
- if (!IsPIC) {
- SDValue Ops[] = { JTI };
- HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+ if (!IsPIC && !IsN64) {
+ JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI);
+ HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI);
+ JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO);
} else {// Emit Load from Global Pointer
- JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
- HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
- MachinePointerInfo(),
- false, false, 0);
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OfstFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag);
+ JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+ JTI);
+ HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI,
+ MachinePointerInfo(), false, false, false, 0);
+ JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag);
}
- SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
- ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
- return ResNode;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo);
}
SDValue MipsTargetLowering::
@@ -1572,7 +1725,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
// ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
N->getOffset(), MipsII::MO_ABS_HI);
SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
@@ -1581,16 +1734,19 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
} else {
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_GOT);
- CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP);
- SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
- CP, MachinePointerInfo::getConstantPool(),
+ EVT ValTy = Op.getValueType();
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
+ N->getOffset(), GOTFlag);
+ CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), CP);
+ SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(), false,
false, false, 0);
- SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
- ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
+ N->getOffset(), OFSTFlag);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo);
+ ResNode = DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
}
return ResNode;
@@ -1608,62 +1764,165 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
- MachinePointerInfo(SV),
- false, false, 0);
+ MachinePointerInfo(SV), false, false, 0);
}
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
- // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
- DebugLoc dl = Op.getDebugLoc();
- SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
- SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
- DAG.getConstant(0x7fffffff, MVT::i32));
- SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
- DAG.getConstant(0x80000000, MVT::i32));
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ EVT TyX = Op.getOperand(0).getValueType();
+ EVT TyY = Op.getOperand(1).getValueType();
+ SDValue Const1 = DAG.getConstant(1, MVT::i32);
+ SDValue Const31 = DAG.getConstant(31, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Res;
+
+ // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+ // to i32.
+ SDValue X = (TyX == MVT::f32) ?
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ Const1);
+ SDValue Y = (TyY == MVT::f32) ?
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) :
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1),
+ Const1);
+
+ if (HasR2) {
+ // ext E, Y, 31, 1 ; extract bit31 of Y
+ // ins X, E, 31, 1 ; insert extracted bit at bit31 of X
+ SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1);
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X);
+ } else {
+ // sll SllX, X, 1
+ // srl SrlX, SllX, 1
+ // srl SrlY, Y, 31
+ // sll SllY, SrlX, 31
+ // or Or, SrlX, SllY
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+ SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31);
+ SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31);
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY);
+ }
+
+ if (TyX == MVT::f32)
+ return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res);
+
+ SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
- // FIXME:
- // Use ext/ins instructions if target architecture is Mips32r2.
- // Eliminate redundant mfc1 and mtc1 instructions.
- unsigned LoIdx = 0, HiIdx = 1;
+static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
+ unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
+ EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
+ SDValue Const1 = DAG.getConstant(1, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Bitcast to integer nodes.
+ SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
+ SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1));
+
+ if (HasR2) {
+ // ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y
+ // ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X
+ SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
+ DAG.getConstant(WidthY - 1, MVT::i32), Const1);
+
+ if (WidthX > WidthY)
+ E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E);
+ else if (WidthY > WidthX)
+ E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E);
+
+ SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E,
+ DAG.getConstant(WidthX - 1, MVT::i32), Const1, X);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I);
+ }
- if (!isLittle)
- std::swap(LoIdx, HiIdx);
+ // (d)sll SllX, X, 1
+ // (d)srl SrlX, SllX, 1
+ // (d)srl SrlY, Y, width(Y)-1
+ // (d)sll SllY, SrlX, width(Y)-1
+ // or Or, SrlX, SllY
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1);
+ SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y,
+ DAG.getConstant(WidthY - 1, MVT::i32));
+
+ if (WidthX > WidthY)
+ SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY);
+ else if (WidthY > WidthX)
+ SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY);
+
+ SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY,
+ DAG.getConstant(WidthX - 1, MVT::i32));
+ SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or);
+}
- DebugLoc dl = Op.getDebugLoc();
- SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
- Op.getOperand(0),
- DAG.getConstant(LoIdx, MVT::i32));
- SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
- Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32));
- SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
- Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32));
- SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0,
- DAG.getConstant(0x7fffffff, MVT::i32));
- SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1,
- DAG.getConstant(0x80000000, MVT::i32));
- SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
-
- if (!isLittle)
- std::swap(Word0, Word1);
-
- return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1);
-}
-
-SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
- const {
- EVT Ty = Op.getValueType();
+SDValue
+MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasMips64())
+ return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
- assert(Ty == MVT::f32 || Ty == MVT::f64);
+ return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+}
- if (Ty == MVT::f32)
- return LowerFCOPYSIGN32(Op, DAG);
- else
- return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+ // to i32.
+ SDValue X = (Op.getValueType() == MVT::f32) ?
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ Const1);
+
+ // Clear MSB.
+ if (HasR2)
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
+ DAG.getRegister(Mips::ZERO, MVT::i32),
+ DAG.getConstant(31, MVT::i32), Const1, X);
+ else {
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+ }
+
+ if (Op.getValueType() == MVT::f32)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res);
+
+ SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
+
+static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+ SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Bitcast to integer node.
+ SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
+
+ // Clear MSB.
+ if (HasR2)
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
+ DAG.getRegister(Mips::ZERO_64, MVT::i64),
+ DAG.getConstant(63, MVT::i32), Const1, X);
+ else {
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1);
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res);
+}
+
+SDValue
+MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
+ return LowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+
+ return LowerFABS32(Op, DAG, Subtarget->hasMips32r2());
}
SDValue MipsTargetLowering::
@@ -1676,13 +1935,14 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT);
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ IsN64 ? Mips::FP_64 : Mips::FP, VT);
return FrameAddr;
}
// TODO: set SType according to the desired memory barrier behavior.
-SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op,
- SelectionDAG& DAG) const {
+SDValue
+MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const {
unsigned SType = 0;
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
@@ -1703,8 +1963,6 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
-#include "MipsGenCallingConv.inc"
-
//===----------------------------------------------------------------------===//
// TODO: Implement a generic logic using tblgen that can support this.
// Mips O32 ABI rules:
@@ -1726,13 +1984,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
static const unsigned IntRegsSize=4, FloatRegsSize=2;
- static const unsigned IntRegs[] = {
+ static const uint16_t IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
- static const unsigned F32Regs[] = {
+ static const uint16_t F32Regs[] = {
Mips::F12, Mips::F14
};
- static const unsigned F64Regs[] = {
+ static const uint16_t F64Regs[] = {
Mips::D6, Mips::D7
};
@@ -1811,13 +2069,77 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
return false; // CC must always match
}
+static const uint16_t Mips64IntRegs[8] =
+ {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
+ Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
+static const uint16_t Mips64DPRegs[8] =
+ {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
+ Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64};
+
+static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ unsigned Align = std::max(ArgFlags.getByValAlign(), (unsigned)8);
+ unsigned Size = (ArgFlags.getByValSize() + 7) / 8 * 8;
+ unsigned FirstIdx = State.getFirstUnallocated(Mips64IntRegs, 8);
+
+ assert(Align <= 16 && "Cannot handle alignments larger than 16.");
+
+ // If byval is 16-byte aligned, the first arg register must be even.
+ if ((Align == 16) && (FirstIdx % 2)) {
+ State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]);
+ ++FirstIdx;
+ }
+
+ // Mark the registers allocated.
+ for (unsigned I = FirstIdx; Size && (I < 8); Size -= 8, ++I)
+ State.AllocateReg(Mips64IntRegs[I], Mips64DPRegs[I]);
+
+ // Allocate space on caller's stack.
+ unsigned Offset = State.AllocateStack(Size, Align);
+
+ if (FirstIdx < 8)
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx],
+ LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+ return true;
+}
+
+#include "MipsGenCallingConv.inc"
+
+static void
+AnalyzeMips64CallOperands(CCState &CCInfo,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ bool R;
+
+ if (Outs[i].IsFixed)
+ R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ else
+ R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+
+ if (R) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Call Calling Convention Implementation
//===----------------------------------------------------------------------===//
static const unsigned O32IntRegsSize = 4;
-static const unsigned O32IntRegs[] = {
+static const uint16_t O32IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -1848,9 +2170,8 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
- MachinePointerInfo(),
- false, false, std::min(ByValAlign,
- (unsigned )4));
+ MachinePointerInfo(), false, false, false,
+ std::min(ByValAlign, (unsigned )4));
MemOpChains.push_back(LoadVal.getValue(1));
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
@@ -1886,7 +2207,7 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
// Read second subword if necessary.
if (RemainingSize != 0) {
assert(RemainingSize == 1 && "There must be one byte remaining.");
- LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
unsigned Alignment = std::min(ByValAlign, (unsigned )2);
SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
@@ -1919,13 +2240,101 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
MachinePointerInfo(0), MachinePointerInfo(0));
}
+// Copy Mips64 byVal arg to registers and stack.
+void static
+PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+ SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
+ SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ EVT PtrTy, bool isLittle) {
+ unsigned ByValSize = Flags.getByValSize();
+ unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8);
+ bool IsRegLoc = VA.isRegLoc();
+ unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
+ unsigned LocMemOffset = 0;
+ unsigned MemCpySize = ByValSize;
+
+ if (!IsRegLoc)
+ LocMemOffset = VA.getLocMemOffset();
+ else {
+ const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
+ VA.getLocReg());
+ const uint16_t *RegEnd = Mips64IntRegs + 8;
+
+ // Copy double words to registers.
+ for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) {
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ SDValue LoadVal = DAG.getLoad(MVT::i64, dl, Chain, LoadPtr,
+ MachinePointerInfo(), false, false, false,
+ Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+ RegsToPass.push_back(std::make_pair(*Reg, LoadVal));
+ }
+
+ // Return if the struct has been fully copied.
+ if (!(MemCpySize = ByValSize - Offset))
+ return;
+
+ // If there is an argument register available, copy the remainder of the
+ // byval argument with sub-doubleword loads and shifts.
+ if (Reg != RegEnd) {
+ assert((ByValSize < Offset + 8) &&
+ "Size of the remainder should be smaller than 8-byte.");
+ SDValue Val;
+ for (unsigned LoadSize = 4; Offset < ByValSize; LoadSize /= 2) {
+ unsigned RemSize = ByValSize - Offset;
+
+ if (RemSize < LoadSize)
+ continue;
+
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ SDValue LoadVal =
+ DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr,
+ MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8),
+ false, false, Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+
+ // Offset in number of bits from double word boundary.
+ unsigned OffsetDW = (Offset % 8) * 8;
+ unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8);
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal,
+ DAG.getConstant(Shamt, MVT::i32));
+
+ Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) :
+ Shift;
+ Offset += LoadSize;
+ Alignment = std::min(Alignment, LoadSize);
+ }
+
+ RegsToPass.push_back(std::make_pair(*Reg, Val));
+ return;
+ }
+ }
+
+ assert(MemCpySize && "MemCpySize must not be zero.");
+
+ // Create a fixed object on stack at offset LocMemOffset and copy
+ // remainder of byval arg to it with memcpy.
+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
+ SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
+ ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
+ DAG.getConstant(MemCpySize, PtrTy), Alignment,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+}
+
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
/// TODO: isTailCall.
SDValue
MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1943,10 +2352,12 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
- if (Subtarget->isABI_O32())
+ if (IsO32)
CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
+ else if (HasMips64)
+ AnalyzeMips64CallOperands(CCInfo, Outs);
else
CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
@@ -1963,7 +2374,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// If this is the first call, create a stack frame object that points to
// a location to which .cprestore saves $gp.
- if (IsPIC && !MipsFI->getGPFI())
+ if (IsO32 && IsPIC && MipsFI->globalBaseRegFixed() && !MipsFI->getGPFI())
MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
// Get the frame index of the stack frame object that points to the location
@@ -1973,7 +2384,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// Update size of the maximum argument space.
// For O32, a minimum of four words (16 bytes) of argument space is
// allocated.
- if (Subtarget->isABI_O32())
+ if (IsO32)
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
@@ -1988,7 +2399,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
NextStackOffset = (NextStackOffset + StackAlignment - 1) /
StackAlignment * StackAlignment;
- if (IsPIC)
+ if (MipsFI->needGPSaveRestore())
MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
MFI->setObjectOffset(DynAllocFI, NextStackOffset);
@@ -2004,22 +2415,40 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
SDValue Arg = OutVals[i];
CCValAssign &VA = ArgLocs[i];
+ MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // ByVal Arg.
+ if (Flags.isByVal()) {
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ if (IsO32)
+ WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ MFI, DAG, Arg, VA, Flags, getPointerTy(),
+ Subtarget->isLittle());
+ else
+ PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ MFI, DAG, Arg, VA, Flags, getPointerTy(),
+ Subtarget->isLittle());
+ continue;
+ }
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
- if (Subtarget->isABI_O32() && VA.isRegLoc()) {
- if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
- if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+ if (VA.isRegLoc()) {
+ if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
+ (ValVT == MVT::f64 && LocVT == MVT::i64))
+ Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg);
+ else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
Arg, DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
Arg, DAG.getConstant(1, MVT::i32));
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
- unsigned LocRegLo = VA.getLocReg();
+ unsigned LocRegLo = VA.getLocReg();
unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
@@ -2028,13 +2457,13 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
}
break;
case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg);
break;
case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg);
break;
case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg);
break;
}
@@ -2048,28 +2477,15 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// Register can't get to this point...
assert(VA.isMemLoc());
- // ByVal Arg.
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- if (Flags.isByVal()) {
- assert(Subtarget->isABI_O32() &&
- "No support for ByVal args by ABIs other than O32 yet.");
- assert(Flags.getByValSize() &&
- "ByVal args of size 0 should have been ignored by front-end.");
- WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI,
- DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle());
- continue;
- }
-
// Create the frame index object for this incoming parameter
- LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), true);
SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
+ MachinePointerInfo(), false, false, 0));
}
// Extend range of indices of frame objects for outgoing arguments that were
@@ -2093,52 +2509,68 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
- bool LoadSymAddr = false;
+ unsigned char OpFlag;
+ bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
+ bool GlobalOrExternal = false;
SDValue CalleeLo;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- getPointerTy(), 0,MipsII:: MO_GOT);
+ if (IsPICCall && G->getGlobal()->hasInternalLinkage()) {
+ OpFlag = IsO32 ? MipsII::MO_GOT : MipsII::MO_GOT_PAGE;
+ unsigned char LoFlag = IsO32 ? MipsII::MO_ABS_LO : MipsII::MO_GOT_OFST;
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0,
+ OpFlag);
CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
- 0, MipsII::MO_ABS_LO);
+ 0, LoFlag);
} else {
+ OpFlag = IsPICCall ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
getPointerTy(), 0, OpFlag);
}
- LoadSymAddr = true;
+ GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
- getPointerTy(), OpFlag);
- LoadSymAddr = true;
+ if (IsN64 || (!IsO32 && IsPIC))
+ OpFlag = MipsII::MO_GOT_DISP;
+ else if (!IsPIC) // !N64 && static
+ OpFlag = MipsII::MO_NO_FLAG;
+ else // O32 & PIC
+ OpFlag = MipsII::MO_GOT_CALL;
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ OpFlag);
+ GlobalOrExternal = true;
}
SDValue InFlag;
// Create nodes that load address of callee and copy it to T9
- if (IsPIC) {
- if (LoadSymAddr) {
+ if (IsPICCall) {
+ if (GlobalOrExternal) {
// Load callee address
- Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
- SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(),
- false, false, 0);
+ Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(),
+ GetGlobalReg(DAG, getPointerTy()), Callee);
+ SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ Callee, MachinePointerInfo::getGOT(),
+ false, false, false, 0);
// Use GOT+LO if callee has internal linkage.
if (CalleeLo.getNode()) {
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
- Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, getPointerTy(), CalleeLo);
+ Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo);
} else
Callee = LoadValue;
}
+ }
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
// copy to T9
- Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
InFlag = Chain.getValue(1);
- Callee = DAG.getRegister(Mips::T9, MVT::i32);
+ Callee = DAG.getRegister(T9Reg, getPointerTy());
}
// Build a sequence of copy-to-reg nodes chained together with token
@@ -2166,6 +2598,12 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -2216,7 +2654,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
std::vector<SDValue>& OutChains,
SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) {
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const Argument *FuncArg) {
unsigned LocMem = VA.getLocMemOffset();
unsigned FirstWord = LocMem / 4;
@@ -2231,20 +2670,58 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN,
DAG.getConstant(i * 4, MVT::i32));
SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32),
- StorePtr, MachinePointerInfo(), false,
- false, 0);
+ StorePtr, MachinePointerInfo(FuncArg, i * 4),
+ false, false, 0);
OutChains.push_back(Store);
}
}
+// Create frame object on stack and copy registers used for byval passing to it.
+static unsigned
+CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
+ std::vector<SDValue>& OutChains, SelectionDAG &DAG,
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ MachineFrameInfo *MFI, bool IsRegLoc,
+ SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
+ EVT PtrTy, const Argument *FuncArg) {
+ const uint16_t *Reg = Mips64IntRegs + 8;
+ int FOOffset; // Frame object offset from virtual frame pointer.
+
+ if (IsRegLoc) {
+ Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg());
+ FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8;
+ }
+ else
+ FOOffset = VA.getLocMemOffset();
+
+ // Create frame object.
+ unsigned NumRegs = (Flags.getByValSize() + 7) / 8;
+ unsigned LastFI = MFI->CreateFixedObject(NumRegs * 8, FOOffset, true);
+ SDValue FIN = DAG.getFrameIndex(LastFI, PtrTy);
+ InVals.push_back(FIN);
+
+ // Copy arg registers.
+ for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs);
+ ++Reg, ++I) {
+ unsigned VReg = AddLiveIn(MF, *Reg, Mips::CPU64RegsRegisterClass);
+ SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN,
+ DAG.getConstant(I * 8, PtrTy));
+ SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64),
+ StorePtr, MachinePointerInfo(FuncArg, I * 8),
+ false, false, 0);
+ OutChains.push_back(Store);
+ }
+
+ return LastFI;
+}
+
/// LowerFormalArguments - transform physical registers into virtual registers
/// and generate load operations for arguments places on the stack.
SDValue
MipsTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
@@ -2260,23 +2737,46 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
- if (Subtarget->isABI_O32())
+ if (IsO32)
CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
else
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) {
CCValAssign &VA = ArgLocs[i];
+ EVT ValVT = VA.getValVT();
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool IsRegLoc = VA.isRegLoc();
+
+ if (Flags.isByVal()) {
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ if (IsO32) {
+ unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+ LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
+ true);
+ SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+ InVals.push_back(FIN);
+ ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags,
+ &*FuncArg);
+ } else // N32/64
+ LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags,
+ MFI, IsRegLoc, InVals, MipsFI,
+ getPointerTy(), &*FuncArg);
+ continue;
+ }
// Arguments stored on registers
- if (VA.isRegLoc()) {
+ if (IsRegLoc) {
EVT RegVT = VA.getLocVT();
unsigned ArgReg = VA.getLocReg();
- TargetRegisterClass *RC = 0;
+ const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = Mips::CPURegsRegisterClass;
@@ -2305,23 +2805,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
Opcode = ISD::AssertZext;
if (Opcode)
ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ DAG.getValueType(ValVT));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
}
- // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
- if (Subtarget->isABI_O32()) {
- if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
- ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
- if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
- unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
- getNextIntArgReg(ArgReg), RC);
- SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
- if (!Subtarget->isLittle())
- std::swap(ArgValue, ArgValue2);
- ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
- ArgValue, ArgValue2);
- }
+ // Handle floating point arguments passed in integer registers.
+ if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
+ (RegVT == MVT::i64 && ValVT == MVT::f64))
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue);
+ else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) {
+ unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ getNextIntArgReg(ArgReg), RC);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+ if (!Subtarget->isLittle())
+ std::swap(ArgValue, ArgValue2);
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+ ArgValue, ArgValue2);
}
InVals.push_back(ArgValue);
@@ -2330,32 +2829,15 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// sanity check
assert(VA.isMemLoc());
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
-
- if (Flags.isByVal()) {
- assert(Subtarget->isABI_O32() &&
- "No support for ByVal args by ABIs other than O32 yet.");
- assert(Flags.getByValSize() &&
- "ByVal args of size 0 should have been ignored by front-end.");
- unsigned NumWords = (Flags.getByValSize() + 3) / 4;
- LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
- true);
- SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
- InVals.push_back(FIN);
- ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags);
-
- continue;
- }
-
// The stack pointer offset is relative to the caller stack frame.
- LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(LastFI),
- false, false, 0));
+ false, false, false, 0));
}
}
@@ -2372,28 +2854,43 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
}
- if (isVarArg && Subtarget->isABI_O32()) {
+ if (isVarArg) {
+ unsigned NumOfRegs = IsO32 ? 4 : 8;
+ const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs);
+ int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot.
+ const TargetRegisterClass *RC
+ = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass;
+ unsigned RegSize = RC->getSize();
+ int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize;
+
+ // Offset of the first variable argument from stack pointer.
+ int FirstVaArgOffset;
+
+ if (IsO32 || (Idx == NumOfRegs)) {
+ FirstVaArgOffset =
+ (CCInfo.getNextStackOffset() + RegSize - 1) / RegSize * RegSize;
+ } else
+ FirstVaArgOffset = RegSlotOffset;
+
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
- unsigned NextStackOffset = CCInfo.getNextStackOffset();
- assert(NextStackOffset % 4 == 0 &&
- "NextStackOffset must be aligned to 4-byte boundaries.");
- LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+ LastFI = MFI->CreateFixedObject(RegSize, FirstVaArgOffset, true);
MipsFI->setVarArgsFrameIndex(LastFI);
- // If NextStackOffset is smaller than o32's 16-byte reserved argument area,
- // copy the integer registers that have not been used for argument passing
- // to the caller's stack frame.
- for (; NextStackOffset < 16; NextStackOffset += 4) {
- TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
- unsigned Idx = NextStackOffset / 4;
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
- LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+ // Copy the integer registers that have not been used for argument passing
+ // to the argument register save area. For O32, the save area is allocated
+ // in the caller's stack frame, while for N32/64, it is allocated in the
+ // callee's stack frame.
+ for (int StackOffset = RegSlotOffset;
+ Idx < NumOfRegs; ++Idx, StackOffset += RegSize) {
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegs[Idx], RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ MVT::getIntegerVT(RegSize * 8));
+ LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true);
SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
+ MachinePointerInfo(), false, false, 0));
}
}
@@ -2447,8 +2944,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- OutVals[i], Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
@@ -2505,7 +3001,6 @@ getConstraintType(const std::string &Constraint) const
case 'y':
case 'f':
return C_RegisterClass;
- break;
}
}
return TargetLowering::getConstraintType(Constraint);
@@ -2553,14 +3048,19 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
case 'y': // Same as 'r'. Exists for compatibility.
case 'r':
- return std::make_pair(0U, Mips::CPURegsRegisterClass);
+ if (VT == MVT::i32)
+ return std::make_pair(0U, Mips::CPURegsRegisterClass);
+ assert(VT == MVT::i64 && "Unexpected type.");
+ return std::make_pair(0U, Mips::CPU64RegsRegisterClass);
case 'f':
if (VT == MVT::f32)
return std::make_pair(0U, Mips::FGR32RegisterClass);
- if (VT == MVT::f64)
- if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
+ if (Subtarget->isFP64bit())
+ return std::make_pair(0U, Mips::FGR64RegisterClass);
+ else
return std::make_pair(0U, Mips::AFGR64RegisterClass);
- break;
+ }
}
}
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
@@ -2579,3 +3079,10 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
return Imm.isZero();
}
+
+unsigned MipsTargetLowering::getJumpTableEncoding() const {
+ if (IsN64)
+ return MachineJumpTableInfo::EK_GPRel64BlockAddress;
+
+ return TargetLowering::getJumpTableEncoding();
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4be3fed59fc0..c36f40f639f3 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -15,10 +15,10 @@
#ifndef MipsISELLOWERING_H
#define MipsISELLOWERING_H
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
#include "Mips.h"
#include "MipsSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace MipsISD {
@@ -40,13 +40,6 @@ namespace llvm {
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
- // General Dynamic TLS
- TlsGd,
-
- // Local Exec TLS
- TprelHi,
- TprelLo,
-
// Thread Pointer
ThreadPointer,
@@ -79,7 +72,7 @@ namespace llvm {
BuildPairF64,
ExtractElementF64,
- WrapperPIC,
+ Wrapper,
DynAlloc,
@@ -98,6 +91,8 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -114,8 +109,8 @@ namespace llvm {
private:
// Subtarget Info
const MipsSubtarget *Subtarget;
-
- bool HasMips64, IsN64;
+
+ bool HasMips64, IsN64, IsO32;
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -133,8 +128,10 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
@@ -149,7 +146,7 @@ namespace llvm {
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -186,6 +183,8 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ virtual unsigned getJumpTableEncoding() const;
+
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 1fb779d6bec1..b6559452fecf 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
+//===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -59,6 +59,15 @@ def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">;
def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+// FP immediate patterns.
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//
@@ -74,19 +83,35 @@ def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
//===----------------------------------------------------------------------===//
// FP load.
-class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
- Operand MemOpnd>:
+class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
FMem<op, (outs RC:$ft), (ins MemOpnd:$addr),
- !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (FOp addr:$addr))],
+ !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load_a addr:$addr))],
IILoad>;
// FP store.
-class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
- Operand MemOpnd>:
+class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr),
- !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)],
+ !strconcat(opstr, "\t$ft, $addr"), [(store_a RC:$ft, addr:$addr)],
IIStore>;
+// FP indexed load.
+class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
+ RegisterClass PRC, PatFrag FOp>:
+ FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fd, $index($base)"),
+ [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
+ let fs = 0;
+}
+
+// FP indexed store.
+class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
+ RegisterClass PRC, PatFrag FOp>:
+ FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fs, $index($base)"),
+ [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
+ let fd = 0;
+}
+
// Instructions that convert an FP value to 32-bit fixed point.
multiclass FFR1_W_M<bits<6> funct, string opstr> {
def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
@@ -122,6 +147,19 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
}
}
+// FP madd/msub/nmadd/nmsub instruction classes.
+class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
+ SDNode OpNode, RegisterClass RC> :
+ FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>;
+
+class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
+ SDNode OpNode, RegisterClass RC> :
+ FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>;
+
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
@@ -152,8 +190,10 @@ let Predicates = [IsFP64bit] in {
def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
}
-defm FABS : FFR1P_M<0x5, "abs", fabs>;
-defm FNEG : FFR1P_M<0x7, "neg", fneg>;
+let Predicates = [NoNaNsFPMath] in {
+ defm FABS : FFR1P_M<0x5, "abs", fabs>;
+ defm FNEG : FFR1P_M<0x7, "neg", fneg>;
+}
defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>;
// The odd-numbered registers are only referenced when doing loads,
@@ -183,6 +223,14 @@ def MTC1 : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
"mtc1\t$rt, $fs",
[(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
+def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs),
+ "dmfc1\t$rt, $fs",
+ [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>;
+
+def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
+ "dmtc1\t$rt, $fs",
+ [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>;
+
def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
Requires<[NotFP64bit]>;
@@ -191,23 +239,53 @@ def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
/// Floating Point Memory Instructions
let Predicates = [IsN64] in {
- def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>;
- def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>;
- def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>;
- def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>;
+ def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>;
+ def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>;
+ def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64>;
+ def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64>;
}
let Predicates = [NotN64] in {
- def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>;
- def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>;
- let Predicates = [HasMips64] in {
- def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>;
- def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>;
- }
- let Predicates = [NotMips64] in {
- def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>;
- def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>;
- }
+ def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>;
+ def SWC1 : FPStore<0x39, "swc1", FGR32, mem>;
+}
+
+let Predicates = [NotN64, HasMips64] in {
+ def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
+ def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
+}
+
+let Predicates = [NotN64, NotMips64] in {
+ def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>;
+ def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>;
+}
+
+// Indexed loads and stores.
+let Predicates = [HasMips32r2Or64] in {
+ def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
+ def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
+ def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
+ def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
+}
+
+let Predicates = [HasMips32r2, NotMips64] in {
+ def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>;
+ def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>;
+}
+
+let Predicates = [HasMips64, NotN64] in {
+ def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>;
+ def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>;
+}
+
+// n64
+let Predicates = [IsN64] in {
+ def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
+ def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
+ def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
+ def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>;
+ def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>;
+ def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>;
}
/// Floating-point Aritmetic
@@ -216,6 +294,36 @@ defm FDIV : FFR2P_M<0x03, "div", fdiv>;
defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
defm FSUB : FFR2P_M<0x01, "sub", fsub>;
+let Predicates = [HasMips32r2] in {
+ def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
+ def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
+}
+
+let Predicates = [HasMips32r2, NoNaNsFPMath] in {
+ def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
+ def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
+}
+
+let Predicates = [HasMips32r2, NotFP64bit] in {
+ def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
+ def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
+}
+
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in {
+ def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
+ def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
+}
+
+let Predicates = [HasMips32r2, IsFP64bit] in {
+ def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
+ def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
+}
+
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath] in {
+ def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
+ def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
+}
+
//===----------------------------------------------------------------------===//
// Floating Point Branch Codes
//===----------------------------------------------------------------------===//
@@ -259,71 +367,16 @@ def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
+class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
+ FCC<fmt, (outs), (ins RC:$fs, RC:$ft, condcode:$cc),
+ !strconcat("c.$cc.", typestr, "\t$fs, $ft"),
+ [(MipsFPCmp RC:$fs, RC:$ft, imm:$cc)]>;
+
/// Floating Point Compare
let Defs=[FCR31] in {
- def FCMP_S32 : FCC<0x10, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
- "c.$cc.s\t$fs, $ft",
- [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
-
- def FCMP_D32 : FCC<0x11, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
- "c.$cc.d\t$fs, $ft",
- [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
- Requires<[NotFP64bit]>;
-}
-
-
-// Conditional moves:
-// These instructions are expanded in
-// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
-// conditional move instructions.
-// flag:int, data:float
-let usesCustomInserter = 1, Constraints = "$F = $dst" in
-class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
- string instr_asm> :
- FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
- !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
-
-def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
-def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
-
-let Predicates = [NotFP64bit] in {
- def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
- def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
-}
-
-defm : MovzPats<FGR32, MOVZ_S>;
-defm : MovnPats<FGR32, MOVN_S>;
-
-let Predicates = [NotFP64bit] in {
- defm : MovzPats<AFGR64, MOVZ_D>;
- defm : MovnPats<AFGR64, MOVN_D>;
-}
-
-let cc = 0, usesCustomInserter = 1, Uses = [FCR31],
- Constraints = "$F = $dst" in {
-// flag:float, data:int
-class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
- FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
- !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
- [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
-
-// flag:float, data:float
-let cc = 0 in
-class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
- string instr_asm> :
- FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
- !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
- [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
-}
-
-def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
-def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
-def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
-def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
-
-let Predicates = [NotFP64bit] in {
- def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
- def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+ def FCMP_S32 : FCMP<0x10, FGR32, "s">;
+ def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>;
+ def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]>;
}
//===----------------------------------------------------------------------===//
@@ -352,25 +405,46 @@ def ExtractElementF64 :
//===----------------------------------------------------------------------===//
// Floating Point Patterns
//===----------------------------------------------------------------------===//
-def fpimm0 : PatLeaf<(fpimm), [{
- return N->isExactlyValue(+0.0);
-}]>;
-
-def fpimm0neg : PatLeaf<(fpimm), [{
- return N->isExactlyValue(-0.0);
-}]>;
-
def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
-def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
-
def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
-def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
let Predicates = [NotFP64bit] in {
+ def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
+ def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
}
+let Predicates = [IsFP64bit] in {
+ def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>;
+ def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
+
+ def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>;
+ def : Pat<(f32 (sint_to_fp CPU64Regs:$src)),
+ (CVT_S_L (DMTC1 CPU64Regs:$src))>;
+ def : Pat<(f64 (sint_to_fp CPU64Regs:$src)),
+ (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
+
+ def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>;
+ def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
+ def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
+
+ def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
+}
+
+// Patterns for unaligned floating point loads and stores.
+let Predicates = [HasMips32r2Or64, NotN64] in {
+ def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
+ def : Pat<(store_u FGR32:$src, CPURegs:$addr),
+ (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
+}
+
+let Predicates = [IsN64] in {
+ def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
+ def : Pat<(store_u FGR32:$src, CPU64Regs:$addr),
+ (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
+}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index e1725fa867f0..455530389eba 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFormats.td - Mips Instruction Formats ------*- tablegen -*-===//
+//===-- MipsInstrFormats.td - Mips Instruction Formats -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
let Inst{15-0} = imm16;
}
-class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
{
@@ -290,3 +290,40 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
!strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
[(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
+
+// Floating point madd/msub/nmadd/nmsub.
+class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+ bits<5> fd;
+ bits<5> fr;
+ bits<5> fs;
+ bits<5> ft;
+
+ let Opcode = 0x13;
+ let Inst{25-21} = fr;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-3} = funct;
+ let Inst{2-0} = fmt;
+}
+
+// FP indexed load/store instructions.
+class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+{
+ bits<5> base;
+ bits<5> index;
+ bits<5> fs;
+ bits<5> fd;
+
+ let Opcode = 0x13;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 559943a8dbae..a3a18bff6553 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//===-- MipsInstrInfo.cpp - Mips Instruction Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,10 +29,10 @@ using namespace llvm;
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
- RI(*TM.getSubtargetImpl(), *this) {}
+ RI(*TM.getSubtargetImpl(), *this),
+ UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
-
-const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
return RI;
}
@@ -131,6 +131,8 @@ copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::FMOV_S;
else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
Opc = Mips::FMOV_D32;
+ else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D64;
else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
Opc = Mips::MOVCCRToCCR;
else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
@@ -140,18 +142,22 @@ copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::MFHI64, SrcReg = 0;
else if (SrcReg == Mips::LO64)
Opc = Mips::MFLO64, SrcReg = 0;
+ else if (Mips::FGR64RegClass.contains(SrcReg))
+ Opc = Mips::DMFC1;
}
else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
if (DestReg == Mips::HI64)
Opc = Mips::MTHI64, DestReg = 0;
else if (DestReg == Mips::LO64)
Opc = Mips::MTLO64, DestReg = 0;
+ else if (Mips::FGR64RegClass.contains(DestReg))
+ Opc = Mips::DMTC1;
}
assert(Opc && "Cannot copy registers");
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
-
+
if (DestReg)
MIB.addReg(DestReg, RegState::Define);
@@ -162,6 +168,16 @@ copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
+static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag,
+ MFI.getObjectSize(FI), Align);
+}
+
void MipsInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
@@ -169,6 +185,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
unsigned Opc = 0;
if (RC == Mips::CPURegsRegisterClass)
@@ -184,7 +202,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
}
void MipsInstrInfo::
@@ -195,6 +213,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
unsigned Opc = 0;
if (RC == Mips::CPURegsRegisterClass)
@@ -209,7 +228,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0);
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
}
MachineInstr*
@@ -230,7 +250,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ?
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
Opc : 0;
}
@@ -239,21 +260,21 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
{
switch (Opc) {
- default: llvm_unreachable("Illegal opcode!");
- case Mips::BEQ : return Mips::BNE;
- case Mips::BNE : return Mips::BEQ;
- case Mips::BGTZ : return Mips::BLEZ;
- case Mips::BGEZ : return Mips::BLTZ;
- case Mips::BLTZ : return Mips::BGEZ;
- case Mips::BLEZ : return Mips::BGTZ;
- case Mips::BEQ64 : return Mips::BNE64;
- case Mips::BNE64 : return Mips::BEQ64;
- case Mips::BGTZ64 : return Mips::BLEZ64;
- case Mips::BGEZ64 : return Mips::BLTZ64;
- case Mips::BLTZ64 : return Mips::BGEZ64;
- case Mips::BLEZ64 : return Mips::BGTZ64;
- case Mips::BC1T : return Mips::BC1F;
- case Mips::BC1F : return Mips::BC1T;
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ: return Mips::BNE;
+ case Mips::BNE: return Mips::BEQ;
+ case Mips::BGTZ: return Mips::BLEZ;
+ case Mips::BGEZ: return Mips::BLTZ;
+ case Mips::BLTZ: return Mips::BGEZ;
+ case Mips::BLEZ: return Mips::BGTZ;
+ case Mips::BEQ64: return Mips::BNE64;
+ case Mips::BNE64: return Mips::BEQ64;
+ case Mips::BGTZ64: return Mips::BLEZ64;
+ case Mips::BGEZ64: return Mips::BLTZ64;
+ case Mips::BLTZ64: return Mips::BGEZ64;
+ case Mips::BLEZ64: return Mips::BGTZ64;
+ case Mips::BC1T: return Mips::BC1F;
+ case Mips::BC1F: return Mips::BC1T;
}
}
@@ -262,7 +283,7 @@ static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
SmallVectorImpl<MachineOperand>& Cond) {
assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
int NumOp = Inst->getNumExplicitOperands();
-
+
// for both int and fp branches, the last explicit operand is the
// MBB.
BB = Inst->getOperand(NumOp-1).getMBB();
@@ -314,7 +335,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
if (!SecondLastOpc) {
// Unconditional branch
- if (LastOpc == Mips::J) {
+ if (LastOpc == UncondBrOpc) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
@@ -331,7 +352,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If second to last instruction is an unconditional branch,
// analyze it and remove the last instruction.
- if (SecondLastOpc == Mips::J) {
+ if (SecondLastOpc == UncondBrOpc) {
// Return if the last instruction cannot be removed.
if (!AllowModify)
return true;
@@ -343,15 +364,15 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Conditional branch followed by an unconditional branch.
// The last one must be unconditional.
- if (LastOpc != Mips::J)
+ if (LastOpc != UncondBrOpc)
return true;
AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
FBB = LastInst->getOperand(0).getMBB();
return false;
-}
-
+}
+
void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
MachineBasicBlock *TBB, DebugLoc DL,
const SmallVectorImpl<MachineOperand>& Cond)
@@ -385,14 +406,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// Two-way Conditional branch.
if (FBB) {
BuildCondBr(MBB, TBB, DL, Cond);
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
return 2;
}
// One way branch.
// Unconditional branch.
if (Cond.empty())
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
else // Conditional branch.
BuildCondBr(MBB, TBB, DL, Cond);
return 1;
@@ -433,27 +454,3 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
return false;
}
-/// getGlobalBaseReg - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg();
- if (GlobalBaseReg != 0)
- return GlobalBaseReg;
-
- // Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF->front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
- GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
- BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
- GlobalBaseReg).addReg(Mips::GP);
- RegInfo.addLiveIn(Mips::GP);
-
- MipsFI->setGlobalBaseReg(GlobalBaseReg);
- return GlobalBaseReg;
-}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 271d2487ecfa..4be727dd8994 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//===-- MipsInstrInfo.h - Mips Instruction Information ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,9 +15,9 @@
#define MIPSINSTRUCTIONINFO_H
#include "Mips.h"
+#include "MipsRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "MipsRegisterInfo.h"
#define GET_INSTRINFO_HEADER
#include "MipsGenInstrInfo.inc"
@@ -30,90 +30,11 @@ namespace Mips {
unsigned GetOppositeBranchOpc(unsigned Opc);
}
-/// MipsII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MipsII {
- /// Target Operand Flag enum.
- enum TOF {
- //===------------------------------------------------------------------===//
- // Mips Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_GOT - Represents the offset into the global offset table at which
- /// the address the relocation entry symbol resides during execution.
- MO_GOT,
-
- /// MO_GOT_CALL - Represents the offset into the global offset table at
- /// which the address of a call site relocation entry symbol resides
- /// during execution. This is different from the above since this flag
- /// can only be present in call instructions.
- MO_GOT_CALL,
-
- /// MO_GPREL - Represents the offset from the current gp value to be used
- /// for the relocatable object file being produced.
- MO_GPREL,
-
- /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
- /// address.
- MO_ABS_HI,
- MO_ABS_LO,
-
- /// MO_TLSGD - Represents the offset into the global offset table at which
- // the module ID and TSL block offset reside during execution (General
- // Dynamic TLS).
- MO_TLSGD,
-
- /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
- // Exec TLS).
- MO_GOTTPREL,
-
- /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
- // the thread pointer (Local Exec TLS).
- MO_TPREL_HI,
- MO_TPREL_LO,
-
- // N32/64 Flags.
- MO_GPOFF_HI,
- MO_GPOFF_LO,
- MO_GOT_DISP,
- MO_GOT_PAGE,
- MO_GOT_OFST
- };
-
- enum {
- //===------------------------------------------------------------------===//
- // Instruction encodings. These are the standard/most common forms for
- // Mips instructions.
- //
-
- // Pseudo - This represents an instruction that is a pseudo instruction
- // or one that has not been implemented yet. It is illegal to code generate
- // it, but tolerated for intermediate implementation stages.
- Pseudo = 0,
-
- /// FrmR - This form is for instructions of the format R.
- FrmR = 1,
- /// FrmI - This form is for instructions of the format I.
- FrmI = 2,
- /// FrmJ - This form is for instructions of the format J.
- FrmJ = 3,
- /// FrmFR - This form is for instructions of the format FR.
- FrmFR = 4,
- /// FrmFI - This form is for instructions of the format FI.
- FrmFI = 5,
- /// FrmOther - This form is for instructions that have no specific format.
- FrmOther = 6,
-
- FormMask = 15
- };
-}
-
class MipsInstrInfo : public MipsGenInstrInfo {
MipsTargetMachine &TM;
bool IsN64;
const MipsRegisterInfo RI;
+ unsigned UncondBrOpc;
public:
explicit MipsInstrInfo(MipsTargetMachine &TM);
@@ -182,12 +103,6 @@ public:
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
-
- /// getGlobalBaseReg - Return a virtual register initialized with the
- /// the global base register value. Output instructions required to
- /// initialize the register in the function entry block, if necessary.
- ///
- unsigned getGlobalBaseReg(MachineFunction *MF) const;
};
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3fbd41ef6a3b..be74f8e5230c 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -39,8 +39,8 @@ def SDT_MipsDivRem : SDTypeProfile<0, 2,
def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
-def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
- SDTCisVT<1, iPTR>]>;
+def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>,
+ SDTCisSameAs<0, 1>]>;
def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
@@ -103,11 +103,11 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
// target constant nodes that would otherwise remain unchanged with ADDiu
// nodes. Without these wrapper node patterns, the following conditional move
// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
-// compiled:
+// compiled:
// movn %got(d)($gp), %got(c)($gp), $4
// This instruction is illegal since movn can take only register operands.
-def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
+def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
// Pointer to dynamically allocated stack area.
def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
@@ -128,18 +128,31 @@ def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
def HasMips32 : Predicate<"Subtarget.hasMips32()">;
def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">;
def HasMips64 : Predicate<"Subtarget.hasMips64()">;
+def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">;
def NotMips64 : Predicate<"!Subtarget.hasMips64()">;
def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">;
def IsN64 : Predicate<"Subtarget.isABI_N64()">;
def NotN64 : Predicate<"!Subtarget.isABI_N64()">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
// Instruction operand types
-def brtarget : Operand<OtherVT>;
-def calltarget : Operand<i32>;
+def jmptarget : Operand<OtherVT> {
+ let EncoderMethod = "getJumpTargetOpValue";
+}
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+def calltarget : Operand<iPTR> {
+ let EncoderMethod = "getJumpTargetOpValue";
+}
+def calltarget64: Operand<i64>;
def simm16 : Operand<i32>;
def simm16_64 : Operand<i64>;
def shamt : Operand<i32>;
@@ -167,6 +180,12 @@ def mem_ea : Operand<i32> {
let EncoderMethod = "getMemEncoding";
}
+def mem_ea_64 : Operand<i64> {
+ let PrintMethod = "printMemOperandEA";
+ let MIOperandInfo = (ops CPU64Regs, simm16_64);
+ let EncoderMethod = "getMemEncoding";
+}
+
// size operand of ext instruction
def size_ext : Operand<i32> {
let EncoderMethod = "getSizeExtEncoding";
@@ -179,12 +198,12 @@ def size_ins : Operand<i32> {
// Transformation Function - get the lower 16 bits.
def LO16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+ return getImm(N, N->getZExtValue() & 0xFFFF);
}]>;
// Transformation Function - get the higher 16 bits.
def HI16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
+ return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
}]>;
// Node immediate fits as 16-bit sign extended on target immediate.
@@ -202,36 +221,42 @@ def immZExt16 : PatLeaf<(imm), [{
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
-// shamt field must fit in 5 bits.
-def immZExt5 : PatLeaf<(imm), [{
- return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+// Immediate can be loaded with LUi (32-bit int with lower 16-bit cleared).
+def immLow16Zero : PatLeaf<(imm), [{
+ int64_t Val = N->getSExtValue();
+ return isInt<32>(Val) && !(Val & 0xffff);
}]>;
+// shamt field must fit in 5 bits.
+def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
+
// Mips Address Mode! SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
-def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// Pattern fragment for load/store
//===----------------------------------------------------------------------===//
-class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+class UnalignedLoad<PatFrag Node> :
+ PatFrag<(ops node:$ptr), (Node node:$ptr), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment();
}]>;
-class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+class AlignedLoad<PatFrag Node> :
+ PatFrag<(ops node:$ptr), (Node node:$ptr), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment();
}]>;
-class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
- (Node node:$val, node:$ptr), [{
+class UnalignedStore<PatFrag Node> :
+ PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{
StoreSDNode *SD = cast<StoreSDNode>(N);
return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment();
}]>;
-class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
- (Node node:$val, node:$ptr), [{
+class AlignedStore<PatFrag Node> :
+ PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{
StoreSDNode *SD = cast<StoreSDNode>(N);
return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment();
}]>;
@@ -313,27 +338,34 @@ class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
}
// Shifts
-class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
- SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt),
+class shift_rotate_imm<bits<6> func, bits<5> isRotate, string instr_asm,
+ SDNode OpNode, PatFrag PF, Operand ImmOpnd,
+ RegisterClass RC>:
+ FR<0x00, func, (outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
!strconcat(instr_asm, "\t$rd, $rt, $shamt"),
- [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> {
- let rs = _rs;
+ [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu> {
+ let rs = isRotate;
}
-class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt),
+// 32-bit shift instructions.
+class shift_rotate_imm32<bits<6> func, bits<5> isRotate, string instr_asm,
+ SDNode OpNode>:
+ shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, CPURegs>;
+
+class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
+ SDNode OpNode, RegisterClass RC>:
+ FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt),
!strconcat(instr_asm, "\t$rd, $rt, $rs"),
- [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> {
+ [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> {
let shamt = isRotate;
}
// Load Upper Imediate
-class LoadUpper<bits<6> op, string instr_asm>:
- FI<op, (outs CPURegs:$rt), (ins uimm16:$imm16),
+class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>:
+ FI<op, (outs RC:$rt), (ins Imm:$imm16),
!strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu> {
let rs = 0;
+ let neverHasSideEffects = 1;
}
class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
@@ -361,6 +393,14 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
let isPseudo = Pseudo;
}
+// Unaligned Memory Load/Store
+let canFoldAsLoad = 1 in
+class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {}
+
+class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {}
+
// 32-bit load.
multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
@@ -368,7 +408,7 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
Requires<[NotN64]>;
def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
Requires<[IsN64]>;
-}
+}
// 64-bit load.
multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
@@ -377,8 +417,15 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
Requires<[NotN64]>;
def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
Requires<[IsN64]>;
-}
+}
+// 32-bit load.
+multiclass LoadUnAlign32<bits<6> op> {
+ def #NAME# : LoadUnAlign<op, CPURegs, mem>,
+ Requires<[NotN64]>;
+ def _P8 : LoadUnAlign<op, CPURegs, mem64>,
+ Requires<[IsN64]>;
+}
// 32-bit store.
multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
@@ -397,11 +444,19 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
Requires<[IsN64]>;
}
+// 32-bit store.
+multiclass StoreUnAlign32<bits<6> op> {
+ def #NAME# : StoreUnAlign<op, CPURegs, mem>,
+ Requires<[NotN64]>;
+ def _P8 : StoreUnAlign<op, CPURegs, mem64>,
+ Requires<[IsN64]>;
+}
+
// Conditional Branch
class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
- CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+ BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
+ !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -409,9 +464,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
RegisterClass RC>:
- CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
+ BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
+ !strconcat(instr_asm, "\t$rs, $imm16"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
let rt = _rt;
let isBranch = 1;
let isTerminator = 1;
@@ -435,146 +490,228 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
[(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
IIAlu>;
-// Unconditional branch
-let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+// Jump
class JumpFJ<bits<6> op, string instr_asm>:
- FJ<op, (outs), (ins brtarget:$target),
- !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+ FJ<op, (outs), (ins jmptarget:$target),
+ !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> {
+ let isBranch=1;
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocStatic];
+}
+
+// Unconditional branch
+class UncondBranch<bits<6> op, string instr_asm>:
+ BranchBase<op, (outs), (ins brtarget:$imm16),
+ !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
+ let rs = 0;
+ let rt = 0;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocPIC];
+}
-let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
-class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs), (ins CPURegs:$rs),
- !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> {
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
+ isIndirectBranch = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<op, func, (outs), (ins RC:$rs),
+ !strconcat(instr_asm, "\t$rs"), [(brind RC:$rs)], IIBranch> {
let rt = 0;
let rd = 0;
let shamt = 0;
}
// Jump and Link (Call)
-let isCall=1, hasDelaySlot=1,
- // All calls clobber the non-callee saved registers...
- Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
- K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
+let isCall=1, hasDelaySlot=1 in {
class JumpLink<bits<6> op, string instr_asm>:
FJ<op, (outs), (ins calltarget:$target, variable_ops),
!strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
IIBranch>;
- class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
- !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> {
+ class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm,
+ RegisterClass RC>:
+ FR<op, func, (outs), (ins RC:$rs, variable_ops),
+ !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> {
let rt = 0;
let rd = 31;
let shamt = 0;
}
- class BranchLink<string instr_asm>:
- FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$imm16, variable_ops),
- !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch>;
+ class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>:
+ FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16, variable_ops),
+ !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> {
+ let rt = _rt;
+ }
}
// Mul, Div
-class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+class Mult<bits<6> func, string instr_asm, InstrItinClass itin,
+ RegisterClass RC, list<Register> DefRegs>:
+ FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
!strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
let rd = 0;
let shamt = 0;
let isCommutable = 1;
- let Defs = [HI, LO];
+ let Defs = DefRegs;
+ let neverHasSideEffects = 1;
}
-class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
- !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
- [(op CPURegs:$rs, CPURegs:$rt)], itin> {
+class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>:
+ Mult<func, instr_asm, itin, CPURegs, [HI, LO]>;
+
+class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin,
+ RegisterClass RC, list<Register> DefRegs>:
+ FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
+ [(op RC:$rs, RC:$rt)], itin> {
let rd = 0;
let shamt = 0;
- let Defs = [HI, LO];
+ let Defs = DefRegs;
}
+class Div32<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ Div<op, func, instr_asm, itin, CPURegs, [HI, LO]>;
+
// Move from Hi/Lo
-class MoveFromLOHI<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs CPURegs:$rd), (ins),
+class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC,
+ list<Register> UseRegs>:
+ FR<0x00, func, (outs RC:$rd), (ins),
!strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
let rs = 0;
let rt = 0;
let shamt = 0;
+ let Uses = UseRegs;
+ let neverHasSideEffects = 1;
}
-class MoveToLOHI<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs), (ins CPURegs:$rs),
+class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
+ list<Register> DefRegs>:
+ FR<0x00, func, (outs), (ins RC:$rs),
!strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
let rt = 0;
let rd = 0;
let shamt = 0;
+ let Defs = DefRegs;
+ let neverHasSideEffects = 1;
}
-class EffectiveAddress<string instr_asm> :
- FMem<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr),
- instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>;
+class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> :
+ FMem<0x09, (outs RC:$rt), (ins Mem:$addr),
+ instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>;
// Count Leading Ones/Zeros in Word
-class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
- FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs),
- !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
+class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
+ !strconcat(instr_asm, "\t$rd, $rs"),
+ [(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
+
+class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
+ !strconcat(instr_asm, "\t$rd, $rs"),
+ [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
Requires<[HasBitCount]> {
let shamt = 0;
let rt = rd;
}
// Sign Extend in Register.
-class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>:
- FR<0x1f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt),
+class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
+ RegisterClass RC>:
+ FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt),
!strconcat(instr_asm, "\t$rd, $rt"),
- [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> {
+ [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
let rs = 0;
let shamt = sa;
let Predicates = [HasSEInReg];
}
-// Byte Swap
-class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
- FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt),
- !strconcat(instr_asm, "\t$rd, $rt"),
- [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> {
+// Subword Swap
+class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
+ FR<0x1f, func, (outs RC:$rd), (ins RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
let rs = 0;
let shamt = sa;
let Predicates = [HasSwap];
+ let neverHasSideEffects = 1;
}
// Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
- "rdhwr\t$rt, $rd", [], IIAlu> {
+class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
+ : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
+ "rdhwr\t$rt, $rd", [], IIAlu> {
let rs = 0;
let shamt = 0;
}
// Ext and Ins
-class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
- list<dag> pattern, InstrItinClass itin>:
- FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- pattern, itin>, Requires<[HasMips32r2]> {
+class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+ FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
+ !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
+ bits<5> pos;
+ bits<5> sz;
+ let rd = sz;
+ let shamt = pos;
+ let Predicates = [HasMips32r2];
+}
+
+class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+ FR<0x1f, _funct, (outs RC:$rt),
+ (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
+ !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
+ NoItinerary> {
bits<5> pos;
bits<5> sz;
let rd = sz;
let shamt = pos;
+ let Predicates = [HasMips32r2];
+ let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, string Opstr> :
- MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
+ RegisterClass PRC> :
+ MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
!strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
- [(set CPURegs:$dst,
- (Op CPURegs:$ptr, CPURegs:$incr))]>;
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+
+multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
+ def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, Requires<[NotN64]>;
+ def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]>;
+}
// Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, string Width> :
- MipsPseudo<(outs CPURegs:$dst),
- (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap),
- !strconcat("atomic_cmp_swap_", Width,
- "\t$dst, $ptr, $cmp, $swap"),
- [(set CPURegs:$dst,
- (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>;
+class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
+ RegisterClass PRC> :
+ MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+ !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+
+multiclass AtomicCmpSwap32<PatFrag Op, string Width> {
+ def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, Requires<[NotN64]>;
+ def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]>;
+}
+
+class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
+ FMem<Opc, (outs RC:$rt), (ins Mem:$addr),
+ !strconcat(opstring, "\t$rt, $addr"), [], IILoad> {
+ let mayLoad = 1;
+}
+
+class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
+ FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr),
+ !strconcat(opstring, "\t$rt, $addr"), [], IIStore> {
+ let mayStore = 1;
+ let Constraints = "$rt = $dst";
+}
//===----------------------------------------------------------------------===//
// Pseudo instructions
@@ -590,52 +727,64 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-// Some assembly macros need to avoid pseudoinstructions and assembler
-// automatic reodering, we should reorder ourselves.
-def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>;
-def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>;
-def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>;
-def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
-
-// These macros are inserted to prevent GAS from complaining
-// when using the AT register.
-def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
-def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>;
-
// When handling PIC code the assembler needs .cpload and .cprestore
// directives. If the real instructions corresponding these directives
// are used, we have the same behavior, but get also a bunch of warnings
// from the assembler.
-def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
+let neverHasSideEffects = 1 in
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp),
+ ".cprestore\t$loc", []>;
+
+// For O32 ABI & PIC & non-fixed global base register, the following instruction
+// seqeunce is emitted to set the global base register:
+//
+// 0. lui $2, %hi(_gp_disp)
+// 1. addiu $2, $2, %lo(_gp_disp)
+// 2. addu $globalbasereg, $2, $t9
+//
+// SETGP01 is emitted during Prologue/Epilogue insertion and then converted to
+// instructions 0 and 1 in the sequence above during MC lowering.
+// SETGP2 is emitted just before register allocation and converted to
+// instruction 2 just prior to post-RA scheduling.
+//
+// These pseudo instructions are needed to ensure no instructions are inserted
+// before or between instructions 0 and 1, which is a limitation imposed by
+// GNU linker.
+
+let isTerminator = 1, isBarrier = 1 in
+def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>;
+
+let neverHasSideEffects = 1 in
+def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "",
+ []>;
let usesCustomInserter = 1 in {
- def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">;
- def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">;
- def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">;
- def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">;
- def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">;
- def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">;
- def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">;
- def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">;
- def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">;
- def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">;
- def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">;
- def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">;
- def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">;
- def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">;
- def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">;
- def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">;
- def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">;
- def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">;
-
- def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">;
- def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">;
- def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">;
-
- def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">;
- def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">;
- def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">;
+ defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
+ defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16, "load_add_16">;
+ defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32, "load_add_32">;
+ defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">;
+ defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">;
+ defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">;
+ defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8, "load_and_8">;
+ defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16, "load_and_16">;
+ defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32, "load_and_32">;
+ defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8, "load_or_8">;
+ defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16, "load_or_16">;
+ defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32, "load_or_32">;
+ defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">;
+ defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">;
+ defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">;
+ defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">;
+ defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">;
+ defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">;
+
+ defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8, "swap_8">;
+ defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16, "swap_16">;
+ defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32, "swap_32">;
+
+ defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8, "8">;
+ defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16, "16">;
+ defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32, "32">;
}
//===----------------------------------------------------------------------===//
@@ -654,7 +803,7 @@ def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
-def LUi : LoadUpper<0x0f, "lui">;
+def LUi : LoadUpper<0x0f, "lui", CPURegs, uimm16>;
/// Arithmetic Instructions (3-Operand, R-Type)
def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
@@ -669,17 +818,17 @@ def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>;
/// Shift Instructions
-def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
-def SRL : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
-def SRA : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
-def SLLV : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
-def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
-def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+def SLL : shift_rotate_imm32<0x00, 0x00, "sll", shl>;
+def SRL : shift_rotate_imm32<0x02, 0x00, "srl", srl>;
+def SRA : shift_rotate_imm32<0x03, 0x00, "sra", sra>;
+def SLLV : shift_rotate_reg<0x04, 0x00, "sllv", shl, CPURegs>;
+def SRLV : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
+def SRAV : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
// Rotate Instructions
let Predicates = [HasMips32r2] in {
- def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
- def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+ def ROTR : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
+ def ROTRV : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
}
/// Load and Store Instructions
@@ -700,6 +849,12 @@ defm ULW : LoadM32<0x23, "ulw", load_u, 1>;
defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
defm USW : StoreM32<0x2b, "usw", store_u, 1>;
+/// Primitives for unaligned
+defm LWL : LoadUnAlign32<0x22>;
+defm LWR : LoadUnAlign32<0x26>;
+defm SWL : StoreUnAlign32<0x2A>;
+defm SWR : StoreUnAlign32<0x2E>;
+
let hasSideEffects = 1 in
def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
[(MipsSync imm:$stype)], NoItinerary, FrmOther>
@@ -712,19 +867,15 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
}
/// Load-linked, Store-conditional
-let mayLoad = 1 in
- def LL : FMem<0x30, (outs CPURegs:$rt), (ins mem:$addr),
- "ll\t$rt, $addr", [], IILoad>;
-let mayStore = 1, Constraints = "$rt = $dst" in
- def SC : FMem<0x38, (outs CPURegs:$dst), (ins CPURegs:$rt, mem:$addr),
- "sc\t$rt, $addr", [], IIStore>;
+def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>;
+def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]>;
+def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>;
+def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>;
/// Jump and Branch Instructions
def J : JumpFJ<0x02, "j">;
-let isIndirectBranch = 1 in
- def JR : JumpFR<0x00, 0x08, "jr">;
-def JAL : JumpLink<0x03, "jal">;
-def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def JR : JumpFR<0x00, 0x08, "jr", CPURegs>;
+def B : UncondBranch<0x04, "b">;
def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
def BNE : CBranch<0x05, "bne", setne, CPURegs>;
def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
@@ -732,10 +883,10 @@ def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>;
def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
-let rt=0x11 in
- def BGEZAL : BranchLink<"bgezal">;
-let rt=0x10 in
- def BLTZAL : BranchLink<"bltzal">;
+def JAL : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
+def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>;
+def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>;
let isReturn=1, isTerminator=1, hasDelaySlot=1,
isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in
@@ -743,50 +894,26 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1,
"jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
/// Multiply and Divide Instructions.
-def MULT : Mul<0x18, "mult", IIImul>;
-def MULTu : Mul<0x19, "multu", IIImul>;
-def SDIV : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
-def UDIV : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+def MULT : Mult32<0x18, "mult", IIImul>;
+def MULTu : Mult32<0x19, "multu", IIImul>;
+def SDIV : Div32<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV : Div32<MipsDivRemU, 0x1b, "divu", IIIdiv>;
-let Defs = [HI] in
- def MTHI : MoveToLOHI<0x11, "mthi">;
-let Defs = [LO] in
- def MTLO : MoveToLOHI<0x13, "mtlo">;
-
-let Uses = [HI] in
- def MFHI : MoveFromLOHI<0x10, "mfhi">;
-let Uses = [LO] in
- def MFLO : MoveFromLOHI<0x12, "mflo">;
+def MTHI : MoveToLOHI<0x11, "mthi", CPURegs, [HI]>;
+def MTLO : MoveToLOHI<0x13, "mtlo", CPURegs, [LO]>;
+def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>;
+def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>;
/// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<0x10, "seb", i8>;
-def SEH : SignExtInReg<0x18, "seh", i16>;
+def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>;
+def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>;
/// Count Leading
-def CLZ : CountLeading<0x20, "clz",
- [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>;
-def CLO : CountLeading<0x21, "clo",
- [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>;
-
-/// Byte Swap
-def WSBW : ByteSwap<0x20, 0x2, "wsbw">;
-
-// Conditional moves:
-// These instructions are expanded in
-// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
-// conditional move instructions.
-// flag:int, data:int
-class CondMovIntInt<bits<6> funct, string instr_asm> :
- FR<0, funct, (outs CPURegs:$rd),
- (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
- let shamt = 0;
- let usesCustomInserter = 1;
- let Constraints = "$F = $rd";
-}
+def CLZ : CountLeading0<0x20, "clz", CPURegs>;
+def CLO : CountLeading1<0x21, "clo", CPURegs>;
-def MOVZ_I : CondMovIntInt<0x0a, "movz">;
-def MOVN_I : CondMovIntInt<0x0b, "movn">;
+/// Word Swap Bytes Within Halfwords
+def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>;
/// No operation
let addr=0 in
@@ -796,13 +923,13 @@ let addr=0 in
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">;
+def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>;
// DynAlloc node points to dynamically allocated stack space.
// $sp is added to the list of implicitly used registers to prevent dead code
// elimination from removing instructions that modify $sp.
let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">;
+def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>;
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
@@ -815,21 +942,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
Requires<[HasMips32]>;
-def RDHWR : ReadHardware;
+def RDHWR : ReadHardware<CPURegs, HWRegs>;
-def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
- (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz),
- [(set CPURegs:$rt,
- (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
- NoItinerary>;
-
-let Constraints = "$src = $rt" in
-def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
- (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src),
- [(set CPURegs:$rt,
- (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
- CPURegs:$src))],
- NoItinerary>;
+def EXT : ExtBase<0, "ext", CPURegs>;
+def INS : InsBase<4, "ins", CPURegs>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -840,6 +956,8 @@ def : Pat<(i32 immSExt16:$in),
(ADDiu ZERO, imm:$in)>;
def : Pat<(i32 immZExt16:$in),
(ORi ZERO, imm:$in)>;
+def : Pat<(i32 immLow16Zero:$in),
+ (LUi (HI16 imm:$in))>;
// Arbitrary immediates
def : Pat<(i32 imm:$imm),
@@ -864,22 +982,26 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+
def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
(ADDiu CPURegs:$hi, tblockaddress:$lo)>;
-
-def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
(ADDiu CPURegs:$hi, tjumptable:$lo)>;
-
-def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
// gp_rel relocs
def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
@@ -887,39 +1009,45 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
(ADDiu CPURegs:$gp, tconstpool:$in)>;
-// tlsgd
-def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
- (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
-
-// tprel hi/lo
-def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
- (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
-
// wrapper_pic
-class WrapperPICPat<SDNode node>:
- Pat<(MipsWrapperPIC node:$in),
- (ADDiu GP, node:$in)>;
+class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
+ Pat<(MipsWrapper RC:$gp, node:$in),
+ (ADDiuOp RC:$gp, node:$in)>;
-def : WrapperPICPat<tglobaladdr>;
-def : WrapperPICPat<tconstpool>;
-def : WrapperPICPat<texternalsym>;
-def : WrapperPICPat<tblockaddress>;
-def : WrapperPICPat<tjumptable>;
+def : WrapperPat<tglobaladdr, ADDiu, CPURegs>;
+def : WrapperPat<tconstpool, ADDiu, CPURegs>;
+def : WrapperPat<texternalsym, ADDiu, CPURegs>;
+def : WrapperPat<tblockaddress, ADDiu, CPURegs>;
+def : WrapperPat<tjumptable, ADDiu, CPURegs>;
+def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
// Mips does not have "not", so we expand our way
def : Pat<(not CPURegs:$in),
(NOR CPURegs:$in, ZERO)>;
-// extended load and stores
-def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
-def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
-def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>;
-def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>;
+// extended loads
+let Predicates = [NotN64] in {
+ def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
+ def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
+ def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>;
+ def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>;
+}
+let Predicates = [IsN64] in {
+ def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
+ def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
+ def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>;
+ def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>;
+}
// peepholes
-def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+let Predicates = [NotN64] in {
+ def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+ def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>;
+}
+let Predicates = [IsN64] in {
+ def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
+ def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>;
+}
// brcond patterns
multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
@@ -950,38 +1078,6 @@ def : Pat<(brcond RC:$cond, bb:$dst),
defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
-// select patterns
-multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
- def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
- def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
- def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
- def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
- def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
- (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F),
- (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
-}
-
-multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
- def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
- (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
- (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
- def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F),
- (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
-}
-
-defm : MovzPats<CPURegs, MOVZ_I>;
-defm : MovnPats<CPURegs, MOVN_I>;
-
// setcc patterns
multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
Instruction SLTuOp, Register ZEROReg> {
@@ -1029,10 +1125,14 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
// select MipsDynAlloc
def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+// bswap pattern
+def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
include "MipsInstrFPU.td"
include "Mips64InstrInfo.td"
+include "MipsCondMov.td"
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index e3f6a753c406..76ca3e176727 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -1,4 +1,4 @@
-//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===//
+//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -200,7 +200,7 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
switch ((Mips::RelocationType) MR->getRelocationType()) {
- case Mips::reloc_mips_branch:
+ case Mips::reloc_mips_pc16:
ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
*((unsigned*) RelocPos) |= (unsigned) ResultPtr;
break;
@@ -218,13 +218,16 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
*((unsigned*) RelocPos) |= (unsigned) ResultPtr;
break;
- case Mips::reloc_mips_lo:
- ResultPtr = ResultPtr & 0xffff;
+ case Mips::reloc_mips_lo: {
+ // Addend is needed for unaligned load/store instructions, where offset
+ // for the second load/store in the expanded instruction sequence must
+ // be modified by +1 or +3. Otherwise, Addend is 0.
+ int Addend = *((unsigned*) RelocPos) & 0xffff;
+ ResultPtr = (ResultPtr + Addend) & 0xffff;
+ *((unsigned*) RelocPos) &= 0xffff0000;
*((unsigned*) RelocPos) |= (unsigned) ResultPtr;
break;
-
- default:
- llvm_unreachable("ERROR: Unknown Mips relocation.");
+ }
}
}
}
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
index 41f32a35f1b0..f4c4ae86d38d 100644
--- a/lib/Target/Mips/MipsJITInfo.h
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -1,4 +1,4 @@
-//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===//
+//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,8 +19,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
namespace llvm {
class MipsTargetMachine;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 608a7d21a4f9..1597b9334450 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -15,99 +15,179 @@
#include "MipsMCInstLower.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
-#include "MipsMCSymbolRefExpr.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
+
using namespace llvm;
-MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
- MipsAsmPrinter &asmprinter)
- : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
+ : AsmPrinter(asmprinter) {}
+
+void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) {
+ Mang = M;
+ Ctx = C;
+}
MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy,
unsigned Offset) const {
- MipsMCSymbolRefExpr::VariantKind Kind;
+ MCSymbolRefExpr::VariantKind Kind;
const MCSymbol *Symbol;
switch(MO.getTargetFlags()) {
- default: assert(0 && "Invalid target flag!");
- case MipsII::MO_NO_FLAG: Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
- case MipsII::MO_GPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
- case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
- case MipsII::MO_GOT: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
- case MipsII::MO_ABS_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
- case MipsII::MO_ABS_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
- case MipsII::MO_TLSGD: Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
- case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
- case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
- case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
- case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
- case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
- case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break;
- case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
- case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break;
+ default: llvm_unreachable("Invalid target flag!");
+ case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break;
+ case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break;
+ case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+ case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break;
+ case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break;
+ case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break;
+ case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break;
+ case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break;
+ case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break;
+ case MipsII::MO_DTPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break;
+ case MipsII::MO_DTPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break;
+ case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+ case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+ case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+ case MipsII::MO_GPOFF_HI: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
+ case MipsII::MO_GPOFF_LO: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
+ case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break;
+ case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
+ case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break;
}
switch (MOTy) {
- case MachineOperand::MO_MachineBasicBlock:
- Symbol = MO.getMBB()->getSymbol();
- break;
-
- case MachineOperand::MO_GlobalAddress:
- Symbol = Mang->getSymbol(MO.getGlobal());
- break;
-
- case MachineOperand::MO_BlockAddress:
- Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
- break;
-
- case MachineOperand::MO_ExternalSymbol:
- Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
- break;
-
- case MachineOperand::MO_JumpTableIndex:
- Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
- break;
-
- case MachineOperand::MO_ConstantPoolIndex:
- Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
- if (MO.getOffset())
- Offset += MO.getOffset();
- break;
-
- default:
- llvm_unreachable("<unknown operand type>");
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ if (MO.getOffset())
+ Offset += MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+ if (!Offset)
+ return MCOperand::CreateExpr(MCSym);
+
+ // Assume offset is never negative.
+ assert(Offset > 0);
+
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
+ const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(AddExpr);
+}
+
+static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0,
+ const MCOperand& Opnd1,
+ const MCOperand& Opnd2 = MCOperand()) {
+ Inst.setOpcode(Opc);
+ Inst.addOperand(Opnd0);
+ Inst.addOperand(Opnd1);
+ if (Opnd2.isValid())
+ Inst.addOperand(Opnd2);
+}
+
+// Lower ".cpload $reg" to
+// "lui $gp, %hi(_gp_disp)"
+// "addiu $gp, $gp, %lo(_gp_disp)"
+// "addu $gp, $gp, $t9"
+void MipsMCInstLower::LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts) {
+ MCOperand GPReg = MCOperand::CreateReg(Mips::GP);
+ MCOperand T9Reg = MCOperand::CreateReg(Mips::T9);
+ StringRef SymName("_gp_disp");
+ const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
+ const MCSymbolRefExpr *MCSym;
+
+ MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
+ MCOperand SymHi = MCOperand::CreateExpr(MCSym);
+ MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
+ MCOperand SymLo = MCOperand::CreateExpr(MCSym);
+
+ MCInsts.resize(3);
+
+ CreateMCInst(MCInsts[0], Mips::LUi, GPReg, SymHi);
+ CreateMCInst(MCInsts[1], Mips::ADDiu, GPReg, GPReg, SymLo);
+ CreateMCInst(MCInsts[2], Mips::ADDu, GPReg, GPReg, T9Reg);
+}
+
+// Lower ".cprestore offset" to "sw $gp, offset($sp)".
+void MipsMCInstLower::LowerCPRESTORE(int64_t Offset,
+ SmallVector<MCInst, 4>& MCInsts) {
+ assert(isInt<32>(Offset) && (Offset >= 0) &&
+ "Imm operand of .cprestore must be a non-negative 32-bit value.");
+
+ MCOperand SPReg = MCOperand::CreateReg(Mips::SP), BaseReg = SPReg;
+ MCOperand GPReg = MCOperand::CreateReg(Mips::GP);
+
+ if (!isInt<16>(Offset)) {
+ unsigned Hi = ((Offset + 0x8000) >> 16) & 0xffff;
+ Offset &= 0xffff;
+ MCOperand ATReg = MCOperand::CreateReg(Mips::AT);
+ BaseReg = ATReg;
+
+ // lui at,hi
+ // addu at,at,sp
+ MCInsts.resize(2);
+ CreateMCInst(MCInsts[0], Mips::LUi, ATReg, MCOperand::CreateImm(Hi));
+ CreateMCInst(MCInsts[1], Mips::ADDu, ATReg, ATReg, SPReg);
}
-
- return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
- Ctx));
+
+ MCInst Sw;
+ CreateMCInst(Sw, Mips::SW, GPReg, BaseReg, MCOperand::CreateImm(Offset));
+ MCInsts.push_back(Sw);
}
-MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
+ unsigned offset) const {
MachineOperandType MOTy = MO.getType();
-
+
switch (MOTy) {
- default:
- assert(0 && "unknown operand type");
- break;
+ default: llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit()) break;
return MCOperand::CreateReg(MO.getReg());
case MachineOperand::MO_Immediate:
- return MCOperand::CreateImm(MO.getImm());
+ return MCOperand::CreateImm(MO.getImm() + offset);
case MachineOperand::MO_MachineBasicBlock:
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_BlockAddress:
- return LowerSymbolOperand(MO, MOTy, 0);
+ return LowerSymbolOperand(MO, MOTy, offset);
+ case MachineOperand::MO_RegisterMask:
+ break;
}
return MCOperand();
@@ -115,7 +195,7 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp = LowerOperand(MO);
@@ -124,3 +204,140 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.addOperand(MCOp);
}
}
+
+void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI,
+ SmallVector<MCInst,
+ 4>& MCInsts) {
+ unsigned Opc = MI->getOpcode();
+ MCInst Instr1, Instr2, Instr3, Move;
+
+ bool TwoInstructions = false;
+
+ assert(MI->getNumOperands() == 3);
+ assert(MI->getOperand(0).isReg());
+ assert(MI->getOperand(1).isReg());
+
+ MCOperand Target = LowerOperand(MI->getOperand(0));
+ MCOperand Base = LowerOperand(MI->getOperand(1));
+ MCOperand ATReg = MCOperand::CreateReg(Mips::AT);
+ MCOperand ZeroReg = MCOperand::CreateReg(Mips::ZERO);
+
+ MachineOperand UnLoweredName = MI->getOperand(2);
+ MCOperand Name = LowerOperand(UnLoweredName);
+
+ Move.setOpcode(Mips::ADDu);
+ Move.addOperand(Target);
+ Move.addOperand(ATReg);
+ Move.addOperand(ZeroReg);
+
+ switch (Opc) {
+ case Mips::ULW: {
+ // FIXME: only works for little endian right now
+ MCOperand AdjName = LowerOperand(UnLoweredName, 3);
+ if (Base.getReg() == (Target.getReg())) {
+ Instr1.setOpcode(Mips::LWL);
+ Instr1.addOperand(ATReg);
+ Instr1.addOperand(Base);
+ Instr1.addOperand(AdjName);
+ Instr2.setOpcode(Mips::LWR);
+ Instr2.addOperand(ATReg);
+ Instr2.addOperand(Base);
+ Instr2.addOperand(Name);
+ Instr3 = Move;
+ } else {
+ TwoInstructions = true;
+ Instr1.setOpcode(Mips::LWL);
+ Instr1.addOperand(Target);
+ Instr1.addOperand(Base);
+ Instr1.addOperand(AdjName);
+ Instr2.setOpcode(Mips::LWR);
+ Instr2.addOperand(Target);
+ Instr2.addOperand(Base);
+ Instr2.addOperand(Name);
+ }
+ break;
+ }
+ case Mips::ULHu: {
+ // FIXME: only works for little endian right now
+ MCOperand AdjName = LowerOperand(UnLoweredName, 1);
+ Instr1.setOpcode(Mips::LBu);
+ Instr1.addOperand(ATReg);
+ Instr1.addOperand(Base);
+ Instr1.addOperand(AdjName);
+ Instr2.setOpcode(Mips::LBu);
+ Instr2.addOperand(Target);
+ Instr2.addOperand(Base);
+ Instr2.addOperand(Name);
+ Instr3.setOpcode(Mips::INS);
+ Instr3.addOperand(Target);
+ Instr3.addOperand(ATReg);
+ Instr3.addOperand(MCOperand::CreateImm(0x8));
+ Instr3.addOperand(MCOperand::CreateImm(0x18));
+ break;
+ }
+
+ case Mips::USW: {
+ // FIXME: only works for little endian right now
+ assert (Base.getReg() != Target.getReg());
+ TwoInstructions = true;
+ MCOperand AdjName = LowerOperand(UnLoweredName, 3);
+ Instr1.setOpcode(Mips::SWL);
+ Instr1.addOperand(Target);
+ Instr1.addOperand(Base);
+ Instr1.addOperand(AdjName);
+ Instr2.setOpcode(Mips::SWR);
+ Instr2.addOperand(Target);
+ Instr2.addOperand(Base);
+ Instr2.addOperand(Name);
+ break;
+ }
+ case Mips::USH: {
+ MCOperand AdjName = LowerOperand(UnLoweredName, 1);
+ Instr1.setOpcode(Mips::SB);
+ Instr1.addOperand(Target);
+ Instr1.addOperand(Base);
+ Instr1.addOperand(Name);
+ Instr2.setOpcode(Mips::SRL);
+ Instr2.addOperand(ATReg);
+ Instr2.addOperand(Target);
+ Instr2.addOperand(MCOperand::CreateImm(8));
+ Instr3.setOpcode(Mips::SB);
+ Instr3.addOperand(ATReg);
+ Instr3.addOperand(Base);
+ Instr3.addOperand(AdjName);
+ break;
+ }
+ default:
+ // FIXME: need to add others
+ llvm_unreachable("unaligned instruction not processed");
+ }
+
+ MCInsts.push_back(Instr1);
+ MCInsts.push_back(Instr2);
+ if (!TwoInstructions) MCInsts.push_back(Instr3);
+}
+
+// Convert
+// "setgp01 $reg"
+// to
+// "lui $reg, %hi(_gp_disp)"
+// "addiu $reg, $reg, %lo(_gp_disp)"
+void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI,
+ SmallVector<MCInst, 4>& MCInsts) {
+ const MachineOperand &MO = MI->getOperand(0);
+ assert(MO.isReg());
+ MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg());
+ StringRef SymName("_gp_disp");
+ const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
+ const MCSymbolRefExpr *MCSym;
+
+ MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
+ MCOperand SymHi = MCOperand::CreateExpr(MCSym);
+ MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
+ MCOperand SymLo = MCOperand::CreateExpr(MCSym);
+
+ MCInsts.resize(2);
+
+ CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi);
+ CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo);
+}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 223f23aed286..c1d007d2f539 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==//
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,35 +9,39 @@
#ifndef MIPSMCINSTLOWER_H
#define MIPSMCINSTLOWER_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
- class MCSymbol;
class MachineInstr;
class MachineFunction;
class Mangler;
class MipsAsmPrinter;
-
+
/// MipsMCInstLower - This class is used to lower an MachineInstr into an
// MCInst.
class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
typedef MachineOperand::MachineOperandType MachineOperandType;
- MCContext &Ctx;
+ MCContext *Ctx;
Mangler *Mang;
MipsAsmPrinter &AsmPrinter;
public:
- MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
- MipsAsmPrinter &asmprinter);
+ MipsMCInstLower(MipsAsmPrinter &asmprinter);
+ void Initialize(Mangler *mang, MCContext* C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ void LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts);
+ void LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts);
+ void LowerUnalignedLoadStore(const MachineInstr *MI,
+ SmallVector<MCInst, 4>& MCInsts);
+ void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
- MCOperand LowerOperand(const MachineOperand& MO) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
};
}
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
deleted file mode 100644
index a0a242c8c443..000000000000
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mipsmcsymbolrefexpr"
-#include "MipsMCSymbolRefExpr.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-using namespace llvm;
-
-const MipsMCSymbolRefExpr*
-MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
- int Offset, MCContext &Ctx) {
- return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
-}
-
-void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
- switch (Kind) {
- default: assert(0 && "Invalid kind!");
- case VK_Mips_None: break;
- case VK_Mips_GPREL: OS << "%gp_rel("; break;
- case VK_Mips_GOT_CALL: OS << "%call16("; break;
- case VK_Mips_GOT: OS << "%got("; break;
- case VK_Mips_ABS_HI: OS << "%hi("; break;
- case VK_Mips_ABS_LO: OS << "%lo("; break;
- case VK_Mips_TLSGD: OS << "%tlsgd("; break;
- case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
- case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
- case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
- case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
- case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
- case VK_Mips_GOT_DISP: OS << "%got_disp("; break;
- case VK_Mips_GOT_PAGE: OS << "%got_page("; break;
- case VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
- }
-
- OS << *Symbol;
-
- if (Offset) {
- if (Offset > 0)
- OS << '+';
- OS << Offset;
- }
-
- if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO)
- OS << ")))";
- else if (Kind != VK_Mips_None)
- OS << ')';
-}
-
-bool
-MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return false;
-}
-
-void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
- Asm->getOrCreateSymbolData(*Symbol);
-}
-
-const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
- return Symbol->isDefined() ? &Symbol->getSection() : NULL;
-}
-
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h
deleted file mode 100644
index 55e85a79c1c8..000000000000
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSMCSYMBOLREFEXPR_H
-#define MIPSMCSYMBOLREFEXPR_H
-#include "llvm/MC/MCExpr.h"
-
-namespace llvm {
-
-class MipsMCSymbolRefExpr : public MCTargetExpr {
-public:
- enum VariantKind {
- VK_Mips_None,
- VK_Mips_GPREL,
- VK_Mips_GOT_CALL,
- VK_Mips_GOT,
- VK_Mips_ABS_HI,
- VK_Mips_ABS_LO,
- VK_Mips_TLSGD,
- VK_Mips_GOTTPREL,
- VK_Mips_TPREL_HI,
- VK_Mips_TPREL_LO,
- VK_Mips_GPOFF_HI,
- VK_Mips_GPOFF_LO,
- VK_Mips_GOT_DISP,
- VK_Mips_GOT_PAGE,
- VK_Mips_GOT_OFST
- };
-
-private:
- const VariantKind Kind;
- const MCSymbol *Symbol;
- int Offset;
-
- explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
- int _Offset)
- : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
-
-public:
- static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
- const MCSymbol *Symbol, int Offset,
- MCContext &Ctx);
-
- void PrintImpl(raw_ostream &OS) const;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const;
-
- static bool classof(const MCExpr *E) {
- return E->getKind() == MCExpr::Target;
- }
-
- static bool classof(const MipsMCSymbolRefExpr *) { return true; }
-
- int getOffset() const { return Offset; }
- void setOffset(int O) { Offset = O; }
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
new file mode 100644
index 000000000000..b00c62b09f4c
--- /dev/null
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -0,0 +1,50 @@
+//===-- MipsMachineFunctionInfo.cpp - Private data used for Mips ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMachineFunction.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
+ cl::desc("Always use $gp as the global base register."));
+
+bool MipsFunctionInfo::globalBaseRegFixed() const {
+ return FixGlobalBaseReg;
+}
+
+bool MipsFunctionInfo::globalBaseRegSet() const {
+ return GlobalBaseReg;
+}
+
+unsigned MipsFunctionInfo::getGlobalBaseReg() {
+ // Return if it has already been initialized.
+ if (GlobalBaseReg)
+ return GlobalBaseReg;
+
+ const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+
+ if (FixGlobalBaseReg) // $gp is the global base register.
+ return GlobalBaseReg = ST.isABI_N64() ? Mips::GP_64 : Mips::GP;
+
+ const TargetRegisterClass *RC;
+ RC = ST.isABI_N64() ?
+ Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass;
+
+ return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
+}
+
+void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index bc30b6b2425b..0fde55cb62e8 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,19 +14,17 @@
#ifndef MIPS_MACHINE_FUNCTION_INFO_H
#define MIPS_MACHINE_FUNCTION_INFO_H
-#include <utility>
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <utility>
namespace llvm {
/// MipsFunctionInfo - This class is derived from MachineFunction private
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
-private:
MachineFunction& MF;
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
@@ -45,18 +43,20 @@ private:
// InArgFIRange: Range of indices of all frame objects created during call to
// LowerFormalArguments.
// OutArgFIRange: Range of indices of all frame objects created during call to
- // LowerCall except for the frame object for restoring $gp.
+ // LowerCall except for the frame object for restoring $gp.
std::pair<int, int> InArgFIRange, OutArgFIRange;
- int GPFI; // Index of the frame object for restoring $gp
- mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
+ int GPFI; // Index of the frame object for restoring $gp
+ mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
+ bool EmitNOAT;
+
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
- MaxCallFrameSize(0)
+ MaxCallFrameSize(0), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -64,7 +64,7 @@ public:
}
void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
- bool isOutArgFI(int FI) const {
+ bool isOutArgFI(int FI) const {
return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
}
void extendOutArgFIRange(int FirstFI, int LastFI) {
@@ -92,14 +92,18 @@ public:
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+ bool globalBaseRegFixed() const;
+ bool globalBaseRegSet() const;
+ unsigned getGlobalBaseReg();
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+
+ bool getEmitNOAT() const { return EmitNOAT; }
+ void setEmitNOAT() { EmitNOAT = true; }
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f8c0fdac8cf0..f30de449f6d5 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//===-- MipsRegisterInfo.cpp - MIPS Register Information -== --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,9 +13,10 @@
#define DEBUG_TYPE "mips-reg-info"
+#include "MipsRegisterInfo.h"
#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsSubtarget.h"
-#include "MipsRegisterInfo.h"
#include "MipsMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
@@ -45,97 +46,6 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
const TargetInstrInfo &tii)
: MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {}
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// Mips::RA, return the number that it corresponds to (e.g. 31).
-unsigned MipsRegisterInfo::
-getRegisterNumbering(unsigned RegEnum)
-{
- switch (RegEnum) {
- case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
- case Mips::D0:
- return 0;
- case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
- return 1;
- case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
- case Mips::D1:
- return 2;
- case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
- return 3;
- case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
- case Mips::D2:
- return 4;
- case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
- return 5;
- case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
- case Mips::D3:
- return 6;
- case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
- return 7;
- case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
- case Mips::D4:
- return 8;
- case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
- return 9;
- case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
- case Mips::D5:
- return 10;
- case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
- return 11;
- case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
- case Mips::D6:
- return 12;
- case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
- return 13;
- case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
- case Mips::D7:
- return 14;
- case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
- return 15;
- case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
- case Mips::D8:
- return 16;
- case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
- return 17;
- case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
- case Mips::D9:
- return 18;
- case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
- return 19;
- case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
- case Mips::D10:
- return 20;
- case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
- return 21;
- case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
- case Mips::D11:
- return 22;
- case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
- return 23;
- case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
- case Mips::D12:
- return 24;
- case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
- return 25;
- case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
- case Mips::D13:
- return 26;
- case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
- return 27;
- case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
- case Mips::D14:
- return 28;
- case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
- return 29;
- case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
- case Mips::D15:
- return 30;
- case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
- return 31;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
//===----------------------------------------------------------------------===//
@@ -143,71 +53,55 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
//===----------------------------------------------------------------------===//
/// Mips Callee Saved Registers
-const unsigned* MipsRegisterInfo::
+const uint16_t* MipsRegisterInfo::
getCalleeSavedRegs(const MachineFunction *MF) const
{
- // Mips callee-save register range is $16-$23, $f20-$f30
- static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
- Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
- Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
- Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
- Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
- };
-
- static const unsigned Mips32CalleeSavedRegs[] = {
- Mips::D15, Mips::D14, Mips::D13, Mips::D12, Mips::D11, Mips::D10,
- Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
- Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
- };
-
- static const unsigned N32CalleeSavedRegs[] = {
- Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64,
- Mips::D21_64,
- Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
- Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
- Mips::S0_64, 0
- };
+ if (Subtarget.isSingleFloat())
+ return CSR_SingleFloatOnly_SaveList;
+ else if (!Subtarget.hasMips64())
+ return CSR_O32_SaveList;
+ else if (Subtarget.isABI_N32())
+ return CSR_N32_SaveList;
- static const unsigned N64CalleeSavedRegs[] = {
- Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64,
- Mips::D26_64, Mips::D25_64, Mips::D24_64,
- Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
- Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
- Mips::S0_64, 0
- };
+ assert(Subtarget.isABI_N64());
+ return CSR_N64_SaveList;
+}
+const uint32_t*
+MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
+{
if (Subtarget.isSingleFloat())
- return SingleFloatOnlyCalleeSavedRegs;
+ return CSR_SingleFloatOnly_RegMask;
else if (!Subtarget.hasMips64())
- return Mips32CalleeSavedRegs;
+ return CSR_O32_RegMask;
else if (Subtarget.isABI_N32())
- return N32CalleeSavedRegs;
-
+ return CSR_N32_RegMask;
+
assert(Subtarget.isABI_N64());
- return N64CalleeSavedRegs;
+ return CSR_N64_RegMask;
}
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- static const unsigned ReservedCPURegs[] = {
- Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
- Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0
+ static const uint16_t ReservedCPURegs[] = {
+ Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
+ Mips::SP, Mips::FP, Mips::RA
};
- static const unsigned ReservedCPU64Regs[] = {
- Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
- Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0
+ static const uint16_t ReservedCPU64Regs[] = {
+ Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
+ Mips::SP_64, Mips::FP_64, Mips::RA_64
};
BitVector Reserved(getNumRegs());
typedef TargetRegisterClass::iterator RegIter;
- for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg)
- Reserved.set(*Reg);
+ for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
+ Reserved.set(ReservedCPURegs[I]);
if (Subtarget.hasMips64()) {
- for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg)
- Reserved.set(*Reg);
+ for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I)
+ Reserved.set(ReservedCPU64Regs[I]);
// Reserve all registers in AFGR64.
for (RegIter Reg = Mips::AFGR64RegisterClass->begin();
@@ -224,10 +118,25 @@ getReservedRegs(const MachineFunction &MF) const {
Reg != Mips::FGR64RegisterClass->end(); ++Reg)
Reserved.set(*Reg);
}
-
+
+ // If GP is dedicated as a global base register, reserve it.
+ if (MF.getInfo<MipsFunctionInfo>()->globalBaseRegFixed()) {
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::GP_64);
+ }
+
+ // Reserve hardware registers.
+ Reserved.set(Mips::HWR29);
+ Reserved.set(Mips::HWR29_64);
+
return Reserved;
}
+bool
+MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
// This function eliminate ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
void MipsRegisterInfo::
@@ -259,8 +168,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
errs() << "<--------->\n" << MI);
int FrameIndex = MI.getOperand(i).getIndex();
- int stackSize = MF.getFrameInfo()->getStackSize();
- int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ uint64_t stackSize = MF.getFrameInfo()->getStackSize();
+ int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
<< "spOffset : " << spOffset << "\n"
@@ -279,52 +188,71 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// 1. Outgoing arguments.
// 2. Pointer to dynamically allocated stack space.
// 3. Locations for callee-saved registers.
- // Everything else is referenced relative to whatever register
+ // Everything else is referenced relative to whatever register
// getFrameRegister() returns.
unsigned FrameReg;
if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
(FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
- FrameReg = Mips::SP;
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
- FrameReg = getFrameRegister(MF);
-
+ FrameReg = getFrameRegister(MF);
+
// Calculate final offset.
// - There is no need to change the offset if the frame object is one of the
// following: an outgoing argument, pointer to a dynamically allocated
// stack space or a $gp restore location,
// - If the frame object is any of the following, its offset must be adjusted
// by adding the size of the stack:
- // incoming argument, callee-saved register location or local variable.
- int Offset;
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
MipsFI->isDynAllocFI(FrameIndex))
Offset = spOffset;
else
- Offset = spOffset + stackSize;
+ Offset = spOffset + (int64_t)stackSize;
Offset += MI.getOperand(i+1).getImm();
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
// If MI is not a debug value, make sure Offset fits in the 16-bit immediate
- // field.
- if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+ // field.
+ if (!MI.isDebugValue() && !isInt<16>(Offset)) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = II->getDebugLoc();
- int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
- ((Offset & 0x8000) != 0);
-
- // FIXME: change this when mips goes MC".
- BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
- BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
- BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
- .addReg(Mips::AT);
- FrameReg = Mips::AT;
- Offset = (short)(Offset & 0xffff);
-
- BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+ MipsAnalyzeImmediate AnalyzeImm;
+ unsigned Size = Subtarget.isABI_N64() ? 64 : 32;
+ unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi;
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */);
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+ MipsFI->setEmitNOAT();
+
+ // The first instruction can be a LUi, which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == LUi)
+ BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+ else
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ // Build the remaining instructions in Seq except for the last one.
+ for (++Inst; Inst != Seq.end() - 1; ++Inst)
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+
+ FrameReg = ATReg;
+ Offset = SignExtend64<16>(Inst->ImmOpnd);
}
MI.getOperand(i).ChangeToRegister(FrameReg, false);
@@ -334,18 +262,18 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool IsN64 = Subtarget.isABI_N64();
- return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+ return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) :
+ (IsN64 ? Mips::SP_64 : Mips::SP);
}
unsigned MipsRegisterInfo::
getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
- return 0;
}
unsigned MipsRegisterInfo::
getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
- return 0;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 67e57dd71bdd..0716d29b2f38 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//===-- MipsRegisterInfo.h - Mips Register Information Impl -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -42,10 +42,13 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 925ad9e70ab6..ce399a031201 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//===-- MipsRegisterInfo.td - Mips Register defs -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -50,6 +50,7 @@ class AFPR<bits<5> num, string n, list<Register> subregs>
: MipsRegWithSubRegs<n, subregs> {
let Num = num;
let SubRegIndices = [sub_fpeven, sub_fpodd];
+ let CoveredBySubRegs = 1;
}
class AFPR64<bits<5> num, string n, list<Register> subregs>
@@ -68,8 +69,6 @@ class HWR<bits<5> num, string n> : MipsReg<n> {
//===----------------------------------------------------------------------===//
let Namespace = "Mips" in {
- // FIXME: Fix DwarfRegNum.
-
// General Purpose Registers
def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>;
@@ -105,38 +104,38 @@ let Namespace = "Mips" in {
def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
// General Purpose 64-bit Registers
- def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>;
- def AT_64 : Mips64GPRReg< 1, "AT", [AT]>;
- def V0_64 : Mips64GPRReg< 2, "2", [V0]>;
- def V1_64 : Mips64GPRReg< 3, "3", [V1]>;
- def A0_64 : Mips64GPRReg< 4, "4", [A0]>;
- def A1_64 : Mips64GPRReg< 5, "5", [A1]>;
- def A2_64 : Mips64GPRReg< 6, "6", [A2]>;
- def A3_64 : Mips64GPRReg< 7, "7", [A3]>;
- def T0_64 : Mips64GPRReg< 8, "8", [T0]>;
- def T1_64 : Mips64GPRReg< 9, "9", [T1]>;
- def T2_64 : Mips64GPRReg< 10, "10", [T2]>;
- def T3_64 : Mips64GPRReg< 11, "11", [T3]>;
- def T4_64 : Mips64GPRReg< 12, "12", [T4]>;
- def T5_64 : Mips64GPRReg< 13, "13", [T5]>;
- def T6_64 : Mips64GPRReg< 14, "14", [T6]>;
- def T7_64 : Mips64GPRReg< 15, "15", [T7]>;
- def S0_64 : Mips64GPRReg< 16, "16", [S0]>;
- def S1_64 : Mips64GPRReg< 17, "17", [S1]>;
- def S2_64 : Mips64GPRReg< 18, "18", [S2]>;
- def S3_64 : Mips64GPRReg< 19, "19", [S3]>;
- def S4_64 : Mips64GPRReg< 20, "20", [S4]>;
- def S5_64 : Mips64GPRReg< 21, "21", [S5]>;
- def S6_64 : Mips64GPRReg< 22, "22", [S6]>;
- def S7_64 : Mips64GPRReg< 23, "23", [S7]>;
- def T8_64 : Mips64GPRReg< 24, "24", [T8]>;
- def T9_64 : Mips64GPRReg< 25, "25", [T9]>;
- def K0_64 : Mips64GPRReg< 26, "26", [K0]>;
- def K1_64 : Mips64GPRReg< 27, "27", [K1]>;
- def GP_64 : Mips64GPRReg< 28, "GP", [GP]>;
- def SP_64 : Mips64GPRReg< 29, "SP", [SP]>;
- def FP_64 : Mips64GPRReg< 30, "FP", [FP]>;
- def RA_64 : Mips64GPRReg< 31, "RA", [RA]>;
+ def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>, DwarfRegNum<[0]>;
+ def AT_64 : Mips64GPRReg< 1, "AT", [AT]>, DwarfRegNum<[1]>;
+ def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>;
+ def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>;
+ def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>;
+ def A1_64 : Mips64GPRReg< 5, "5", [A1]>, DwarfRegNum<[5]>;
+ def A2_64 : Mips64GPRReg< 6, "6", [A2]>, DwarfRegNum<[6]>;
+ def A3_64 : Mips64GPRReg< 7, "7", [A3]>, DwarfRegNum<[7]>;
+ def T0_64 : Mips64GPRReg< 8, "8", [T0]>, DwarfRegNum<[8]>;
+ def T1_64 : Mips64GPRReg< 9, "9", [T1]>, DwarfRegNum<[9]>;
+ def T2_64 : Mips64GPRReg< 10, "10", [T2]>, DwarfRegNum<[10]>;
+ def T3_64 : Mips64GPRReg< 11, "11", [T3]>, DwarfRegNum<[11]>;
+ def T4_64 : Mips64GPRReg< 12, "12", [T4]>, DwarfRegNum<[12]>;
+ def T5_64 : Mips64GPRReg< 13, "13", [T5]>, DwarfRegNum<[13]>;
+ def T6_64 : Mips64GPRReg< 14, "14", [T6]>, DwarfRegNum<[14]>;
+ def T7_64 : Mips64GPRReg< 15, "15", [T7]>, DwarfRegNum<[15]>;
+ def S0_64 : Mips64GPRReg< 16, "16", [S0]>, DwarfRegNum<[16]>;
+ def S1_64 : Mips64GPRReg< 17, "17", [S1]>, DwarfRegNum<[17]>;
+ def S2_64 : Mips64GPRReg< 18, "18", [S2]>, DwarfRegNum<[18]>;
+ def S3_64 : Mips64GPRReg< 19, "19", [S3]>, DwarfRegNum<[19]>;
+ def S4_64 : Mips64GPRReg< 20, "20", [S4]>, DwarfRegNum<[20]>;
+ def S5_64 : Mips64GPRReg< 21, "21", [S5]>, DwarfRegNum<[21]>;
+ def S6_64 : Mips64GPRReg< 22, "22", [S6]>, DwarfRegNum<[22]>;
+ def S7_64 : Mips64GPRReg< 23, "23", [S7]>, DwarfRegNum<[23]>;
+ def T8_64 : Mips64GPRReg< 24, "24", [T8]>, DwarfRegNum<[24]>;
+ def T9_64 : Mips64GPRReg< 25, "25", [T9]>, DwarfRegNum<[25]>;
+ def K0_64 : Mips64GPRReg< 26, "26", [K0]>, DwarfRegNum<[26]>;
+ def K1_64 : Mips64GPRReg< 27, "27", [K1]>, DwarfRegNum<[27]>;
+ def GP_64 : Mips64GPRReg< 28, "GP", [GP]>, DwarfRegNum<[28]>;
+ def SP_64 : Mips64GPRReg< 29, "SP", [SP]>, DwarfRegNum<[29]>;
+ def FP_64 : Mips64GPRReg< 30, "FP", [FP]>, DwarfRegNum<[30]>;
+ def RA_64 : Mips64GPRReg< 31, "RA", [RA]>, DwarfRegNum<[31]>;
/// Mips Single point precision FPU Registers
def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
@@ -192,38 +191,38 @@ let Namespace = "Mips" in {
def D15 : AFPR<30, "F30", [F30, F31]>;
/// Mips Double point precision FPU Registers in MFP64 mode.
- def D0_64 : AFPR64<0, "F0", [F0]>;
- def D1_64 : AFPR64<1, "F1", [F1]>;
- def D2_64 : AFPR64<2, "F2", [F2]>;
- def D3_64 : AFPR64<3, "F3", [F3]>;
- def D4_64 : AFPR64<4, "F4", [F4]>;
- def D5_64 : AFPR64<5, "F5", [F5]>;
- def D6_64 : AFPR64<6, "F6", [F6]>;
- def D7_64 : AFPR64<7, "F7", [F7]>;
- def D8_64 : AFPR64<8, "F8", [F8]>;
- def D9_64 : AFPR64<9, "F9", [F9]>;
- def D10_64 : AFPR64<10, "F10", [F10]>;
- def D11_64 : AFPR64<11, "F11", [F11]>;
- def D12_64 : AFPR64<12, "F12", [F12]>;
- def D13_64 : AFPR64<13, "F13", [F13]>;
- def D14_64 : AFPR64<14, "F14", [F14]>;
- def D15_64 : AFPR64<15, "F15", [F15]>;
- def D16_64 : AFPR64<16, "F16", [F16]>;
- def D17_64 : AFPR64<17, "F17", [F17]>;
- def D18_64 : AFPR64<18, "F18", [F18]>;
- def D19_64 : AFPR64<19, "F19", [F19]>;
- def D20_64 : AFPR64<20, "F20", [F20]>;
- def D21_64 : AFPR64<21, "F21", [F21]>;
- def D22_64 : AFPR64<22, "F22", [F22]>;
- def D23_64 : AFPR64<23, "F23", [F23]>;
- def D24_64 : AFPR64<24, "F24", [F24]>;
- def D25_64 : AFPR64<25, "F25", [F25]>;
- def D26_64 : AFPR64<26, "F26", [F26]>;
- def D27_64 : AFPR64<27, "F27", [F27]>;
- def D28_64 : AFPR64<28, "F28", [F28]>;
- def D29_64 : AFPR64<29, "F29", [F29]>;
- def D30_64 : AFPR64<30, "F30", [F30]>;
- def D31_64 : AFPR64<31, "F31", [F31]>;
+ def D0_64 : AFPR64<0, "F0", [F0]>, DwarfRegNum<[32]>;
+ def D1_64 : AFPR64<1, "F1", [F1]>, DwarfRegNum<[33]>;
+ def D2_64 : AFPR64<2, "F2", [F2]>, DwarfRegNum<[34]>;
+ def D3_64 : AFPR64<3, "F3", [F3]>, DwarfRegNum<[35]>;
+ def D4_64 : AFPR64<4, "F4", [F4]>, DwarfRegNum<[36]>;
+ def D5_64 : AFPR64<5, "F5", [F5]>, DwarfRegNum<[37]>;
+ def D6_64 : AFPR64<6, "F6", [F6]>, DwarfRegNum<[38]>;
+ def D7_64 : AFPR64<7, "F7", [F7]>, DwarfRegNum<[39]>;
+ def D8_64 : AFPR64<8, "F8", [F8]>, DwarfRegNum<[40]>;
+ def D9_64 : AFPR64<9, "F9", [F9]>, DwarfRegNum<[41]>;
+ def D10_64 : AFPR64<10, "F10", [F10]>, DwarfRegNum<[42]>;
+ def D11_64 : AFPR64<11, "F11", [F11]>, DwarfRegNum<[43]>;
+ def D12_64 : AFPR64<12, "F12", [F12]>, DwarfRegNum<[44]>;
+ def D13_64 : AFPR64<13, "F13", [F13]>, DwarfRegNum<[45]>;
+ def D14_64 : AFPR64<14, "F14", [F14]>, DwarfRegNum<[46]>;
+ def D15_64 : AFPR64<15, "F15", [F15]>, DwarfRegNum<[47]>;
+ def D16_64 : AFPR64<16, "F16", [F16]>, DwarfRegNum<[48]>;
+ def D17_64 : AFPR64<17, "F17", [F17]>, DwarfRegNum<[49]>;
+ def D18_64 : AFPR64<18, "F18", [F18]>, DwarfRegNum<[50]>;
+ def D19_64 : AFPR64<19, "F19", [F19]>, DwarfRegNum<[51]>;
+ def D20_64 : AFPR64<20, "F20", [F20]>, DwarfRegNum<[52]>;
+ def D21_64 : AFPR64<21, "F21", [F21]>, DwarfRegNum<[53]>;
+ def D22_64 : AFPR64<22, "F22", [F22]>, DwarfRegNum<[54]>;
+ def D23_64 : AFPR64<23, "F23", [F23]>, DwarfRegNum<[55]>;
+ def D24_64 : AFPR64<24, "F24", [F24]>, DwarfRegNum<[56]>;
+ def D25_64 : AFPR64<25, "F25", [F25]>, DwarfRegNum<[57]>;
+ def D26_64 : AFPR64<26, "F26", [F26]>, DwarfRegNum<[58]>;
+ def D27_64 : AFPR64<27, "F27", [F27]>, DwarfRegNum<[59]>;
+ def D28_64 : AFPR64<28, "F28", [F28]>, DwarfRegNum<[60]>;
+ def D29_64 : AFPR64<29, "F29", [F29]>, DwarfRegNum<[61]>;
+ def D30_64 : AFPR64<30, "F30", [F30]>, DwarfRegNum<[62]>;
+ def D31_64 : AFPR64<31, "F31", [F31]>, DwarfRegNum<[63]>;
// Hi/Lo registers
def HI : Register<"hi">, DwarfRegNum<[64]>;
@@ -239,6 +238,7 @@ let Namespace = "Mips" in {
// Hardware register $29
def HWR29 : Register<"29">;
+ def HWR29_64 : Register<"29">;
}
//===----------------------------------------------------------------------===//
@@ -301,3 +301,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
+def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+
diff --git a/lib/Target/Mips/MipsRelocations.h b/lib/Target/Mips/MipsRelocations.h
index 66d1bfd993f5..0787ed399d5f 100644
--- a/lib/Target/Mips/MipsRelocations.h
+++ b/lib/Target/Mips/MipsRelocations.h
@@ -1,16 +1,16 @@
-//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
+//===-- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// This file defines the Mips target-specific relocation types
// (for relocation-model=static).
//
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
#ifndef MIPSRELOCATIONS_H_
#define MIPSRELOCATIONS_H_
@@ -20,10 +20,10 @@
namespace llvm {
namespace Mips{
enum RelocationType {
- // reloc_mips_branch - pc relative relocation for branches. The lower 18
+ // reloc_mips_pc16 - pc relative relocation for branches. The lower 18
// bits of the difference between the branch target and the branch
// instruction, shifted right by 2.
- reloc_mips_branch = 1,
+ reloc_mips_pc16 = 1,
// reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
// lower 16 bits of the address is negative).
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 00be8ee94431..1add02ff83e9 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,4 +1,4 @@
-//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
+//===-- MipsSchedule.td - Mips Scheduling Definitions ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 016d449a1067..00347df9ac84 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//===-- MipsSubtarget.cpp - Mips Subtarget Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,6 +13,7 @@
#include "MipsSubtarget.h"
#include "Mips.h"
+#include "MipsRegisterInfo.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -21,17 +22,19 @@
using namespace llvm;
+void MipsSubtarget::anchor() { }
+
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little) :
MipsGenSubtargetInfo(TT, CPU, FS),
- MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
+ MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
HasMinMax(false), HasSwap(false), HasBitCount(false)
{
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "mips32r1";
+ CPUName = "mips32";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
@@ -41,7 +44,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
// Set MipsABI if it hasn't been set yet.
if (MipsABI == UnknownABI)
- MipsABI = hasMips64() ? N64 : O32;
+ MipsABI = hasMips64() ? N64 : O32;
// Check if Architecture and ABI are compatible.
assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) ||
@@ -52,3 +55,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
if (TT.find("linux") == std::string::npos)
IsLinux = false;
}
+
+bool
+MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(hasMips64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
+ return OptLevel >= CodeGenOpt::Aggressive;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index d9dddad23a48..7faf77baa650 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//===-- MipsSubtarget.h - Define Subtarget for the Mips ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,6 +25,7 @@ namespace llvm {
class StringRef;
class MipsSubtarget : public MipsGenSubtargetInfo {
+ virtual void anchor();
public:
// NOTE: O64 will not be supported.
@@ -88,6 +89,9 @@ protected:
InstrItineraryData InstrItins;
public:
+ virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
/// Only O32 and EABI supported right now.
bool isABI_EABI() const { return MipsABI == EABI; }
@@ -111,6 +115,8 @@ public:
bool hasMips64() const { return MipsArchVersion >= Mips64; }
bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
+ bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); }
+
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 6480da3e6dfc..ad022311ed7d 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips.h"
#include "MipsTargetMachine.h"
+#include "Mips.h"
#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -34,86 +35,119 @@ extern "C" void LLVMInitializeMipsTarget() {
// Using CodeModel::Large enables different CALL behavior.
MipsTargetMachine::
MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- bool isLittle):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
- Subtarget(TT, CPU, FS, isLittle),
- DataLayout(isLittle ?
- (Subtarget.isABI_N64() ?
- "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
- (Subtarget.isABI_N64() ?
- "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ CodeGenOpt::Level OL,
+ bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, isLittle),
+ DataLayout(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), JITInfo() {
}
+void MipsebTargetMachine::anchor() { }
+
MipsebTargetMachine::
MipsebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void MipselTargetMachine::anchor() { }
MipselTargetMachine::
MipselTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+void Mips64ebTargetMachine::anchor() { }
Mips64ebTargetMachine::
Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void Mips64elTargetMachine::anchor() { }
Mips64elTargetMachine::
Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+namespace {
+/// Mips Code Generator Pass Configuration Options.
+class MipsPassConfig : public TargetPassConfig {
+public:
+ MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ MipsTargetMachine &getMipsTargetMachine() const {
+ return getTM<MipsTargetMachine>();
+ }
+
+ const MipsSubtarget &getMipsSubtarget() const {
+ return *getMipsTargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new MipsPassConfig(this, PM);
+}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
-bool MipsTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+bool MipsPassConfig::addInstSelector()
{
- PM.add(createMipsISelDag(*this));
+ PM.add(createMipsISelDag(getMipsTargetMachine()));
return false;
}
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
-bool MipsTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+bool MipsPassConfig::addPreEmitPass()
{
- PM.add(createMipsDelaySlotFillerPass(*this));
+ PM.add(createMipsDelaySlotFillerPass(getMipsTargetMachine()));
return true;
}
-bool MipsTargetMachine::
-addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool MipsPassConfig::addPreRegAlloc() {
// Do not restore $gp if target is Mips64.
// In N32/64, $gp is a callee-saved register.
- if (!Subtarget.hasMips64())
- PM.add(createMipsEmitGPRestorePass(*this));
+ if (!getMipsSubtarget().hasMips64())
+ PM.add(createMipsEmitGPRestorePass(getMipsTargetMachine()));
return true;
}
-bool MipsTargetMachine::
-addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
- PM.add(createMipsExpandPseudoPass(*this));
+bool MipsPassConfig::addPreSched2() {
+ PM.add(createMipsExpandPseudoPass(getMipsTargetMachine()));
return true;
}
bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- JITCodeEmitter &JCE) {
+ JITCodeEmitter &JCE) {
// Machine code emitter pass for Mips.
PM.add(createMipsJITCodeEmitterPass(*this, JCE));
return false;
}
-
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 118ed107c514..80c00e80f12c 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,15 +14,15 @@
#ifndef MIPSTARGETMACHINE_H
#define MIPSTARGETMACHINE_H
-#include "MipsSubtarget.h"
+#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
-#include "MipsFrameLowering.h"
+#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "MipsJITInfo.h"
namespace llvm {
class formatted_raw_ostream;
@@ -38,8 +38,9 @@ namespace llvm {
public:
MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool isLittle);
virtual const MipsInstrInfo *getInstrInfo() const
@@ -67,15 +68,8 @@ namespace llvm {
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};
@@ -83,37 +77,47 @@ namespace llvm {
/// MipsebTargetMachine - Mips32 big endian target machine.
///
class MipsebTargetMachine : public MipsTargetMachine {
+ virtual void anchor();
public:
MipsebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// MipselTargetMachine - Mips32 little endian target machine.
///
class MipselTargetMachine : public MipsTargetMachine {
+ virtual void anchor();
public:
MipselTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// Mips64ebTargetMachine - Mips64 big endian target machine.
///
class Mips64ebTargetMachine : public MipsTargetMachine {
+ virtual void anchor();
public:
Mips64ebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// Mips64elTargetMachine - Mips64 little endian target machine.
///
class Mips64elTargetMachine : public MipsTargetMachine {
+ virtual void anchor();
public:
Mips64elTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 05c46f5c97a5..04dc60aa6b45 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//===-- MipsTargetObjectFile.cpp - Mips Object Files ----------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
index 5692604504a8..4172d00a33f0 100644
--- a/lib/Target/Mips/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo
MipsTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMipsInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMipsInfo MipsCommonTableGen)
diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..2d425686227f
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/TargetInfo/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsInfo
+parent = Mips
+required_libraries = MC Support Target
+add_to_library_groups = Mips
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 6e87b171d896..a3be342f77fb 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -1,10 +1,10 @@
set(LLVM_TARGET_DEFINITIONS PTX.td)
-llvm_tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(PTXGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(PTXGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(PTXGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(PTXCommonTableGen)
add_llvm_target(PTXCodeGen
@@ -17,6 +17,7 @@ add_llvm_target(PTXCodeGen
PTXMCAsmStreamer.cpp
PTXMCInstLower.cpp
PTXMFInfoExtract.cpp
+ PTXMachineFunctionInfo.cpp
PTXParamManager.cpp
PTXRegAlloc.cpp
PTXRegisterInfo.cpp
@@ -25,19 +26,6 @@ add_llvm_target(PTXCodeGen
PTXTargetMachine.cpp
)
-add_llvm_library_dependencies(LLVMPTXCodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMPTXDesc
- LLVMPTXInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
index 029d06031dd2..b25289347ba5 100644
--- a/lib/Target/PTX/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt
@@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter
add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
-add_llvm_library_dependencies(LLVMPTXAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..af5d20029ffc
--- /dev/null
+++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PTXAsmPrinter
+parent = PTX
+required_libraries = MC Support
+add_to_library_groups = PTX
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
index aabb404dad68..1830213267b8 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -18,27 +18,69 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define GET_INSTRUCTION_NAME
#include "PTXGenAsmWriter.inc"
PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) :
- MCInstPrinter(MAI) {
+ MCInstPrinter(MAI, MII, MRI) {
// Initialize the set of available features.
setAvailableFeatures(STI.getFeatureBits());
}
-StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
-
void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ // Decode the register number into type and offset
+ unsigned RegSpace = RegNo & 0x7;
+ unsigned RegType = (RegNo >> 3) & 0x7;
+ unsigned RegOffset = RegNo >> 6;
+
+ // Print the register
+ OS << "%";
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
}
void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -96,9 +138,23 @@ void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
O << "), ";
}
- O << *(MI->getOperand(Index++).getExpr()) << ", (";
-
+ const MCExpr* Expr = MI->getOperand(Index++).getExpr();
unsigned NumArgs = MI->getOperand(Index++).getImm();
+
+ // if the function call is to printf or puts, change to vprintf
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ if (Sym.getName() == "printf" || Sym.getName() == "puts") {
+ O << "vprintf";
+ } else {
+ O << Sym.getName();
+ }
+ } else {
+ O << *Expr;
+ }
+
+ O << ", (";
+
if (NumArgs > 0) {
printOperand(MI, Index++, O);
for (unsigned i = 1; i < NumArgs; ++i) {
@@ -125,6 +181,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
O << "0000000000000000";
}
+ } else if (Op.isReg()) {
+ printRegName(O, Op.getReg());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
const MCExpr *Expr = Op.getExpr();
@@ -156,7 +214,6 @@ void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
llvm_unreachable("Unknown rounding mode!");
case PTXRoundingMode::RndDefault:
llvm_unreachable("FP rounding-mode pass did not handle instruction!");
- break;
case PTXRoundingMode::RndNone:
// Do not print anything.
break;
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
index 86dfd482885b..ea4d50477d70 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===//
+//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,14 +23,12 @@ class MCOperand;
class PTXInstPrinter : public MCInstPrinter {
public:
- PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+ PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- static const char *getInstructionName(unsigned Opcode);
-
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt
new file mode 100644
index 000000000000..15a1eb532837
--- /dev/null
+++ b/lib/Target/PTX/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = PTX
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = PTXCodeGen
+parent = PTX
+required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
+add_to_library_groups = PTX
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
index 811ef4bd1fcf..d1fd74c369b9 100644
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc
PTXMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMPTXDesc
- LLVMMC
- LLVMPTXInfo
- LLVMPTXAsmPrinter
- LLVMSupport
- )
-
add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..19b80c5ce9e3
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PTXDesc
+parent = PTX
+required_libraries = MC PTXAsmPrinter PTXInfo Support
+add_to_library_groups = PTX
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index c6094be4d15b..a3e0f320fcb5 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -18,6 +18,8 @@
#define PTXBASEINFO_H
#include "PTXMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace PTXStateSpace {
@@ -57,6 +59,75 @@ namespace llvm {
RndPosInfInt = 10 // .rpi
};
} // namespace PTXII
+
+ namespace PTXRegisterType {
+ // Register type encoded in MCOperands
+ enum {
+ Pred = 0,
+ B16,
+ B32,
+ B64,
+ F32,
+ F64
+ };
+ } // namespace PTXRegisterType
+
+ namespace PTXRegisterSpace {
+ // Register space encoded in MCOperands
+ enum {
+ Reg = 0,
+ Local,
+ Param,
+ Argument,
+ Return
+ };
+ }
+
+ inline static void decodeRegisterName(raw_ostream &OS,
+ unsigned EncodedReg) {
+ OS << "%";
+
+ unsigned RegSpace = EncodedReg & 0x7;
+ unsigned RegType = (EncodedReg >> 3) & 0x7;
+ unsigned RegOffset = EncodedReg >> 6;
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
+ }
} // namespace llvm
#endif
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
index efefead5341d..cdfbc8046246 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -16,6 +16,8 @@
using namespace llvm;
+void PTXMCAsmInfo::anchor() { }
+
PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::ptx64)
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
index 03f5d66b3d60..32ca0696950e 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,7 +20,9 @@ namespace llvm {
class Target;
class StringRef;
- struct PTXMCAsmInfo : public MCAsmInfo {
+ class PTXMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
};
} // namespace llvm
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
index a5af3b880135..08fb970fc290 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -52,18 +52,21 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
}
static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
assert(SyntaxVariant == 0 && "We only have one syntax variant");
- return new PTXInstPrinter(MAI, STI);
+ return new PTXInstPrinter(MAI, MII, MRI, STI);
}
extern "C" void LLVMInitializePTXTargetMC() {
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
index 1003b0b5ece9..542638ace135 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -15,9 +15,7 @@
#define PTXMCTARGETDESC_H
namespace llvm {
-class MCSubtargetInfo;
class Target;
-class StringRef;
extern Target ThePTX32Target;
extern Target ThePTX64Target;
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 7d46cce4aeca..ffb92cb89e39 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -1,4 +1,3 @@
-//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 693bb9c48344..994a68ed207a 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -1,4 +1,4 @@
-//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 733744bbd08b..0b6ac7bcb588 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "ptx-asm-printer"
-#include "PTX.h"
#include "PTXAsmPrinter.h"
+#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXParamManager.h"
#include "PTXRegisterInfo.h"
@@ -25,7 +25,6 @@
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -52,23 +51,23 @@ using namespace llvm;
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo,
- const MachineRegisterInfo& MRI) {
- const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
-
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass == TRC) return # clsstr;
-
- TEST_REGCLS(RegPred, pred);
- TEST_REGCLS(RegI16, b16);
- TEST_REGCLS(RegI32, b32);
- TEST_REGCLS(RegI64, b64);
- TEST_REGCLS(RegF32, b32);
- TEST_REGCLS(RegF64, b64);
-#undef TEST_REGCLS
-
- llvm_unreachable("Not in any register class!");
- return NULL;
+static const char *getRegisterTypeName(unsigned RegType) {
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type");
+ case PTXRegisterType::Pred:
+ return ".pred";
+ case PTXRegisterType::B16:
+ return ".b16";
+ case PTXRegisterType::B32:
+ return ".b32";
+ case PTXRegisterType::B64:
+ return ".b64";
+ case PTXRegisterType::F32:
+ return ".f32";
+ case PTXRegisterType::F64:
+ return ".f64";
+ }
}
static const char *getStateSpaceName(unsigned addressSpace) {
@@ -80,7 +79,6 @@ static const char *getStateSpaceName(unsigned addressSpace) {
case PTXStateSpace::Parameter: return "param";
case PTXStateSpace::Shared: return "shared";
}
- return NULL;
}
static const char *getTypeName(Type* type) {
@@ -139,15 +137,15 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
// Emit the PTX .version and .target attributes
- OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
- OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+ OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString());
+ OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() +
(ST.supportsDouble() ? ""
- : ", map_f64_to_f32")));
+ : ", map_f64_to_f32"));
// .address_size directive is optional, but it must immediately follow
// the .target directive if present within a module
if (ST.supportsPTX23()) {
- std::string addrSize = ST.is64Bit() ? "64" : "32";
- OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+ const char *addrSize = ST.is64Bit() ? "64" : "32";
+ OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize);
}
OutStreamer.AddBlankLine();
@@ -166,6 +164,11 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
OutStreamer.AddBlankLine();
+ // declare external functions
+ for (Module::const_iterator i = M.begin(), e = M.end();
+ i != e; ++i)
+ EmitFunctionDeclaration(i);
+
// declare global variables
for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
i != e; ++i)
@@ -179,68 +182,47 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
const PTXParamManager &PM = MFI->getParamManager();
// Print register definitions
- std::string regDefs;
+ SmallString<128> regDefs;
+ raw_svector_ostream os(regDefs);
unsigned numRegs;
// pred
- numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
- if(numRegs > 0) {
- regDefs += "\t.reg .pred %p<";
- regDefs += utostr(numRegs);
- regDefs += ">;\n";
- }
+ numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .pred %p<" << numRegs << ">;\n";
// i16
- numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
- if(numRegs > 0) {
- regDefs += "\t.reg .b16 %rh<";
- regDefs += utostr(numRegs);
- regDefs += ">;\n";
- }
+ numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
// i32
- numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
- if(numRegs > 0) {
- regDefs += "\t.reg .b32 %r<";
- regDefs += utostr(numRegs);
- regDefs += ">;\n";
- }
+ numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .b32 %r<" << numRegs << ">;\n";
// i64
- numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
- if(numRegs > 0) {
- regDefs += "\t.reg .b64 %rd<";
- regDefs += utostr(numRegs);
- regDefs += ">;\n";
- }
+ numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
// f32
- numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
- if(numRegs > 0) {
- regDefs += "\t.reg .f32 %f<";
- regDefs += utostr(numRegs);
- regDefs += ">;\n";
- }
+ numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .f32 %f<" << numRegs << ">;\n";
// f64
- numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
- if(numRegs > 0) {
- regDefs += "\t.reg .f64 %fd<";
- regDefs += utostr(numRegs);
- regDefs += ">;\n";
- }
+ numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
// Local params
for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
- i != e; ++i) {
- regDefs += "\t.param .b";
- regDefs += utostr(PM.getParamSize(*i));
- regDefs += " ";
- regDefs += PM.getParamName(*i);
- regDefs += ";\n";
- }
+ i != e; ++i)
+ os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i)
+ << ";\n";
- OutStreamer.EmitRawText(Twine(regDefs));
+ OutStreamer.EmitRawText(os.str());
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
@@ -249,16 +231,13 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
if (FrameInfo->getObjectSize(i) > 0) {
- std::string def = "\t.local .align ";
- def += utostr(FrameInfo->getObjectAlignment(i));
- def += " .b8";
- def += " __local";
- def += utostr(i);
- def += "[";
- def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
- def += "]";
- def += ";";
- OutStreamer.EmitRawText(Twine(def));
+ OutStreamer.EmitRawText("\t.local .align " +
+ Twine(FrameInfo->getObjectAlignment(i)) +
+ " .b8 __local" +
+ Twine(i) +
+ "[" +
+ Twine(FrameInfo->getObjectSize(i)) +
+ "];");
}
}
@@ -295,36 +274,27 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
- std::string decl;
+ SmallString<128> decl;
+ raw_svector_ostream os(decl);
// check if it is defined in some other translation unit
if (gv->isDeclaration())
- decl += ".extern ";
+ os << ".extern ";
// state space: e.g., .global
- decl += ".";
- decl += getStateSpaceName(gv->getType()->getAddressSpace());
- decl += " ";
+ os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' ';
// alignment (optional)
unsigned alignment = gv->getAlignment();
- if (alignment != 0) {
- decl += ".align ";
- decl += utostr(gv->getAlignment());
- decl += " ";
- }
+ if (alignment != 0)
+ os << ".align " << gv->getAlignment() << ' ';
if (PointerType::classof(gv->getType())) {
PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
Type* elementTy = pointerTy->getElementType();
- decl += ".b8 ";
- decl += gvsym->getName();
- decl += "[";
-
- if (elementTy->isArrayTy())
- {
+ if (elementTy->isArrayTy()) {
assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
@@ -333,7 +303,6 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
unsigned numElements = arrayTy->getNumElements();
while (elementTy->isArrayTy()) {
-
arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
@@ -342,110 +311,91 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
// FIXME: isPrimitiveType() == false for i16?
assert(elementTy->isSingleValueType() &&
- "Non-primitive types are not handled");
+ "Non-primitive types are not handled");
- // Compute the size of the array, in bytes.
- uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
- * numElements;
+ // Find the size of the element in bits
+ unsigned elementSize = elementTy->getPrimitiveSizeInBits();
- decl += utostr(arraySize);
+ os << ".b" << elementSize << ' ' << gvsym->getName()
+ << '[' << numElements << ']';
+ } else {
+ os << ".b8" << gvsym->getName() << "[]";
}
- decl += "]";
-
// handle string constants (assume ConstantArray means string)
-
- if (gv->hasInitializer())
- {
+ if (gv->hasInitializer()) {
const Constant *C = gv->getInitializer();
- if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
- {
- decl += " = {";
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ os << " = {";
- for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
- {
- if (i > 0) decl += ",";
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ if (i > 0)
+ os << ',';
- decl += "0x" +
- utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ os << "0x";
+ os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
}
- decl += "}";
+ os << '}';
}
}
- }
- else {
+ } else {
// Note: this is currently the fall-through case and most likely generates
// incorrect code.
- decl += getTypeName(gv->getType());
- decl += " ";
-
- decl += gvsym->getName();
+ os << getTypeName(gv->getType()) << ' ' << gvsym->getName();
- if (ArrayType::classof(gv->getType()) ||
- PointerType::classof(gv->getType()))
- decl += "[]";
+ if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType()))
+ os << "[]";
}
- decl += ";";
-
- OutStreamer.EmitRawText(Twine(decl));
+ os << ';';
+ OutStreamer.EmitRawText(os.str());
OutStreamer.AddBlankLine();
}
void PTXAsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
- if (!CurrentFnSym->isUndefined()) {
+ if (!CurrentFnSym->isUndefined())
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
- return;
- }
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
- const MachineRegisterInfo& MRI = MF->getRegInfo();
- std::string decl = isKernel ? ".entry" : ".func";
+ SmallString<128> decl;
+ raw_svector_ostream os(decl);
+ os << (isKernel ? ".entry" : ".func");
if (!isKernel) {
- decl += " (";
+ os << " (";
if (ST.useParamSpaceForDeviceArgs()) {
for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
b = i; i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
+ if (i != b)
+ os << ", ";
- decl += ".param .b";
- decl += utostr(PM.getParamSize(*i));
- decl += " ";
- decl += PM.getParamName(*i);
+ os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
- decl += ".reg .";
- decl += getRegisterTypeName(*i, MRI);
- decl += " ";
- decl += MFI->getRegisterName(*i);
+ if (i != b)
+ os << ", ";
+
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
+ << MFI->getRegisterName(*i);
}
}
- decl += ")";
+ os << ')';
}
// Print function name
- decl += " ";
- decl += CurrentFnSym->getName().str();
-
- decl += " (";
+ os << ' ' << CurrentFnSym->getName() << " (";
const Function *F = MF->getFunction();
@@ -453,63 +403,80 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
if (isKernel || ST.useParamSpaceForDeviceArgs()) {
/*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
b = i; i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
+ if (i != b)
+ os << ", ";
- decl += ".param .b";
- decl += utostr(PM.getParamSize(*i));
- decl += " ";
- decl += PM.getParamName(*i);
+ os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
}*/
int Counter = 1;
for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
b = i; i != e; ++i) {
if (i != b)
- decl += ", ";
+ os << ", ";
const Type *ArgType = (*i).getType();
- decl += ".param .b";
+ os << ".param .b";
if (ArgType->isPointerTy()) {
if (ST.is64Bit())
- decl += "64";
+ os << "64";
else
- decl += "32";
+ os << "32";
} else {
- decl += utostr(ArgType->getPrimitiveSizeInBits());
+ os << ArgType->getPrimitiveSizeInBits();
}
if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
- decl += " .ptr";
+ os << " .ptr";
switch (PtrType->getAddressSpace()) {
default:
llvm_unreachable("Unknown address space in argument");
case PTXStateSpace::Global:
- decl += " .global";
+ os << " .global";
break;
case PTXStateSpace::Shared:
- decl += " .shared";
+ os << " .shared";
break;
}
}
- decl += " __param_";
- decl += utostr(Counter++);
+ os << " __param_" << Counter++;
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
+ if (i != b)
+ os << ", ";
- decl += ".reg .";
- decl += getRegisterTypeName(*i, MRI);
- decl += " ";
- decl += MFI->getRegisterName(*i);
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
+ << MFI->getRegisterName(*i);
}
}
- decl += ")";
+ os << ')';
+ OutStreamer.EmitRawText(os.str());
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func)
+{
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ std::string decl = "";
+
+ // hard-coded emission of extern vprintf function
+
+ if (func->getName() == "printf" || func->getName() == "puts") {
+ decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b";
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ decl += " __param_2, .param .b";
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ decl += " __param_3)\n";
+ }
+
OutStreamer.EmitRawText(Twine(decl));
}
@@ -535,7 +502,7 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
Entry.setValue(SrcId);
// Print out a .file directive to specify files for .loc directives.
- OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+ OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey());
return SrcId;
}
@@ -550,20 +517,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
MCOperand MCOp;
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
- const MCExpr *Expr;
- const char *RegSymbolName;
+ unsigned EncodedReg;
switch (MO.getType()) {
default:
llvm_unreachable("Unknown operand type");
case MachineOperand::MO_Register:
- // We create register operands as symbols, since the PTXInstPrinter class
- // has no way to map virtual registers back to a name without some ugly
- // hacks.
- // FIXME: Figure out a better way to handle virtual register naming.
- RegSymbolName = MFI->getRegisterName(MO.getReg());
- Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
- OutContext);
- MCOp = MCOperand::CreateExpr(Expr);
+ if (MO.getReg() > 0) {
+ // Encode the register
+ EncodedReg = MFI->getEncodedRegister(MO.getReg());
+ } else {
+ EncodedReg = 0;
+ }
+ MCOp = MCOperand::CreateReg(EncodedReg);
break;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
@@ -594,4 +559,3 @@ extern "C" void LLVMInitializePTXAsmPrinter() {
RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
}
-
diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h
index 538c0802a27e..74c8d58a3e9b 100644
--- a/lib/Target/PTX/PTXAsmPrinter.h
+++ b/lib/Target/PTX/PTXAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===//
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -47,7 +47,7 @@ public:
private:
void EmitVariableDeclaration(const GlobalVariable *gv);
- void EmitFunctionDeclaration();
+ void EmitFunctionDeclaration(const Function* func);
StringMap<unsigned> SourceIdMap;
}; // class PTXAsmPrinter
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
index 0b653e04b3bb..a21d1728d81c 100644
--- a/lib/Target/PTX/PTXFPRoundingModePass.cpp
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -23,9 +23,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
// NOTE: PTXFPRoundingModePass should be executed just before emission.
-namespace llvm {
+namespace {
/// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
/// all FP instructions. Essentially, this pass just looks for all FP
/// instructions that have a rounding mode set to RndDefault, and sets an
@@ -58,7 +60,7 @@ namespace llvm {
void initializeMap();
void processInstruction(MachineInstr &MI);
}; // class PTXFPRoundingModePass
-} // namespace llvm
+} // end anonymous namespace
using namespace llvm;
diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp
index b621b9d634d2..e6e268e480f8 100644
--- a/lib/Target/PTX/PTXFrameLowering.cpp
+++ b/lib/Target/PTX/PTXFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h
index 9320676150df..831e81803df6 100644
--- a/lib/Target/PTX/PTXFrameLowering.h
+++ b/lib/Target/PTX/PTXFrameLowering.h
@@ -1,4 +1,4 @@
-//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 3307d91a6188..ef4455b96bc3 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXISelLowering.h"
+#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
@@ -20,6 +20,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -46,6 +47,11 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setMinFunctionAlignment(2);
+ // Let LLVM use loads/stores for all mem* operations
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
@@ -91,7 +97,8 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
// customise setcc to use bitwise logic if possible
- setOperationAction(ISD::SETCC, MVT::i1, Custom);
+ //setOperationAction(ISD::SETCC, MVT::i1, Custom);
+ setOperationAction(ISD::SETCC, MVT::i1, Legal);
// customize translation of memory addresses
@@ -150,18 +157,27 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// Look for X == 0, X == 1, X != 0, or X != 1
// We can simplify these to bitwise logic
- if (Op1.getOpcode() == ISD::Constant &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- cast<ConstantSDNode>(Op1)->isNullValue()) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ //if (Op1.getOpcode() == ISD::Constant &&
+ // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ // cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ // (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ //
+ // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+ //}
- return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
- }
+ //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1);
+ //if(COp1 && COp1->getZExtValue() == 1) {
+ // if(CC == ISD::SETNE) {
+ // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0);
+ // }
+ //}
+
+ llvm_unreachable("setcc was not matched by a pattern!");
return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
}
@@ -205,7 +221,6 @@ SDValue PTXTargetLowering::
switch (CallConv) {
default:
llvm_unreachable("Unsupported calling convention");
- break;
case CallingConv::PTX_Kernel:
MFI->setKernel(true);
break;
@@ -235,8 +250,25 @@ SDValue PTXTargetLowering::
}
else {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- EVT RegVT = Ins[i].VT;
- TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ EVT RegVT = Ins[i].VT;
+ const TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ unsigned RegType;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1)
+ RegType = PTXRegisterType::Pred;
+ else if (RegVT == MVT::i16)
+ RegType = PTXRegisterType::B16;
+ else if (RegVT == MVT::i32)
+ RegType = PTXRegisterType::B32;
+ else if (RegVT == MVT::i64)
+ RegType = PTXRegisterType::B64;
+ else if (RegVT == MVT::f32)
+ RegType = PTXRegisterType::F32;
+ else if (RegVT == MVT::f64)
+ RegType = PTXRegisterType::F64;
+ else
+ llvm_unreachable("Unknown parameter type");
// Use a unique index in the instruction to prevent instruction folding.
// Yes, this is a hack.
@@ -247,7 +279,7 @@ SDValue PTXTargetLowering::
InVals.push_back(ArgValue);
- MFI->addArgReg(Reg);
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
}
}
@@ -297,26 +329,33 @@ SDValue PTXTargetLowering::
} else {
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
EVT RegVT = Outs[i].VT;
- TargetRegisterClass* TRC = 0;
+ const TargetRegisterClass* TRC;
+ unsigned RegType;
// Determine which register class we need
if (RegVT == MVT::i1) {
TRC = PTX::RegPredRegisterClass;
+ RegType = PTXRegisterType::Pred;
}
else if (RegVT == MVT::i16) {
TRC = PTX::RegI16RegisterClass;
+ RegType = PTXRegisterType::B16;
}
else if (RegVT == MVT::i32) {
TRC = PTX::RegI32RegisterClass;
+ RegType = PTXRegisterType::B32;
}
else if (RegVT == MVT::i64) {
TRC = PTX::RegI64RegisterClass;
+ RegType = PTXRegisterType::B64;
}
else if (RegVT == MVT::f32) {
TRC = PTX::RegF32RegisterClass;
+ RegType = PTXRegisterType::F32;
}
else if (RegVT == MVT::f64) {
TRC = PTX::RegF64RegisterClass;
+ RegType = PTXRegisterType::F64;
}
else {
llvm_unreachable("Unknown parameter type");
@@ -329,7 +368,7 @@ SDValue PTXTargetLowering::
Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
- MFI->addRetReg(Reg);
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
}
}
@@ -344,7 +383,7 @@ SDValue PTXTargetLowering::
SDValue
PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -352,38 +391,99 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction& MF = DAG.getMachineFunction();
- PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
- PTXParamManager &PM = MFI->getParamManager();
+ PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = PTXMFI->getParamManager();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
"Calls are not handled for the target device");
+ // Identify the callee function
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ const Function *function = cast<Function>(GV);
+
+ // allow non-device calls only for printf
+ bool isPrintf = function->getName() == "printf" || function->getName() == "puts";
+
+ assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) &&
+ "PTX function calls must be to PTX device functions");
+
+ unsigned outSize = isPrintf ? 2 : Outs.size();
+
std::vector<SDValue> Ops;
// The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
- Ops.resize(Outs.size() + Ins.size() + 4);
+ Ops.resize(outSize + Ins.size() + 4);
Ops[0] = Chain;
// Identify the callee function
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device &&
- "PTX function calls must be to PTX device functions");
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
Ops[Ins.size()+2] = Callee;
- // Generate STORE_PARAM nodes for each function argument. In PTX, function
- // arguments are explicitly stored into .param variables and passed as
- // arguments. There is no register/stack-based calling convention in PTX.
- Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32);
- for (unsigned i = 0; i != OutVals.size(); ++i) {
- unsigned Size = OutVals[i].getValueType().getSizeInBits();
- unsigned Param = PM.addLocalParam(Size);
- const std::string &ParamName = PM.getParamName(Param);
- SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
- MVT::Other);
+ // #Outs
+ Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32);
+
+ if (isPrintf) {
+ // first argument is the address of the global string variable in memory
+ unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits());
+ SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(),
+ MVT::Other);
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
- ParamValue, OutVals[i]);
- Ops[i+Ins.size()+4] = ParamValue;
+ ParamValue0, OutVals[0]);
+ Ops[Ins.size()+4] = ParamValue0;
+
+ // alignment is the maximum size of all the arguments
+ unsigned alignment = 0;
+ for (unsigned i = 1; i < OutVals.size(); ++i) {
+ alignment = std::max(alignment,
+ OutVals[i].getValueType().getSizeInBits());
+ }
+
+ // size is the alignment multiplied by the number of arguments
+ unsigned size = alignment * (OutVals.size() - 1);
+
+ // second argument is the address of the stack object (unless no arguments)
+ unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits());
+ SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(),
+ MVT::Other);
+ Ops[Ins.size()+5] = ParamValue1;
+
+ if (size > 0)
+ {
+ // create a local stack object to store the arguments
+ unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false);
+ SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy());
+
+ // store each of the arguments to the stack in turn
+ for (unsigned int i = 1; i != OutVals.size(); i++) {
+ SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy()));
+ Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ // copy the address of the local frame index to get the address in non-local space
+ SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex);
+
+ // store this address in the second argument
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr);
+ }
+ }
+ else
+ {
+ // Generate STORE_PARAM nodes for each function argument. In PTX, function
+ // arguments are explicitly stored into .param variables and passed as
+ // arguments. There is no register/stack-based calling convention in PTX.
+ for (unsigned i = 0; i != OutVals.size(); ++i) {
+ unsigned Size = OutVals[i].getValueType().getSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[i]);
+ Ops[i+Ins.size()+4] = ParamValue;
+ }
}
std::vector<SDValue> InParams;
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index 4d2566540af2..33220f4dc346 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -1,4 +1,4 @@
-//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,8 +18,6 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
-class PTXSubtarget;
-class PTXTargetMachine;
namespace PTXISD {
enum NodeType {
@@ -64,9 +62,8 @@ class PTXTargetLowering : public TargetLowering {
SelectionDAG &DAG) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
index 397fdc319a84..267e8341293a 100644
--- a/lib/Target/PTX/PTXInstrFormats.td
+++ b/lib/Target/PTX/PTXInstrFormats.td
@@ -1,4 +1,4 @@
-//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 1b947a5400f4..443cd54906c2 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "ptx-instrinfo"
-#include "PTX.h"
#include "PTXInstrInfo.h"
+#include "PTX.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
}
bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+ return !isPredicated(MI) && MI->isTerminator();
}
bool PTXInstrInfo::
@@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
if (MBB.empty())
return true;
- MachineBasicBlock::const_iterator iter = MBB.end();
+ MachineBasicBlock::iterator iter = MBB.end();
const MachineInstr& instLast1 = *--iter;
- const MCInstrDesc &desc1 = instLast1.getDesc();
// for special case that MBB has only 1 instruction
const bool IsSizeOne = MBB.size() == 1;
// if IsSizeOne is true, *--iter and instLast2 are invalid
// we put a dummy value in instLast2 and desc2 since they are used
const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
- const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
DEBUG(dbgs() << "\n");
DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
}
// this block ends with only an unconditional branch
- if (desc1.isUnconditionalBranch() &&
+ if (instLast1.isUnconditionalBranch() &&
// when IsSizeOne is true, it "absorbs" the evaluation of instLast2
(IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
@@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
// this block ends with a conditional branch and
// it falls through to a successor block
- if (desc1.isConditionalBranch() &&
+ if (instLast1.isConditionalBranch() &&
IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
TBB = GetBranchTarget(instLast1);
@@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB,
// this block ends with a conditional branch
// followed by an unconditional branch
- if (desc2.isConditionalBranch() &&
- desc1.isUnconditionalBranch()) {
+ if (instLast2.isConditionalBranch() &&
+ instLast1.isUnconditionalBranch()) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
TBB = GetBranchTarget(instLast2);
FBB = GetBranchTarget(instLast1);
@@ -302,7 +300,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(false && "storeRegToStackSlot should not be called for PTX");
+ llvm_unreachable("storeRegToStackSlot should not be called for PTX");
}
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -310,7 +308,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(false && "loadRegFromStackSlot should not be called for PTX");
+ llvm_unreachable("loadRegFromStackSlot should not be called for PTX");
}
// static helper routines
@@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
}
bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
- const MCInstrDesc &desc = inst.getDesc();
- return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+ return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
}
bool PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index 871f1ac8d376..fba89c09394c 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -1,4 +1,4 @@
-//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index a3fcea9038cf..bead4286dfd8 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -1,4 +1,4 @@
-//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -680,6 +680,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOVaddr64
: InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
[(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVframe32
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a",
+ [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>;
+ def MOVframe64
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a",
+ [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>;
}
// PTX cvt instructions
@@ -802,6 +808,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isBranch = 1, isTerminator = 1 in {
// FIXME: The pattern part is blank because I cannot (or do not yet know
// how to) use the first operand of PredicateOperand (a RegPred register) here
+ // When this is revisited, make sure to also look at LowerSETCC and try to
+ // fold it into negated predicates, if possible.
def BRAdp
: InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
[/*(brcond pred:$_p, bb:$d)*/]>;
@@ -819,17 +827,17 @@ let hasSideEffects = 1 in {
///===- Parameter Passing Pseudo-Instructions -----------------------------===//
def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
- "mov.pred\t$a, %param$b", []>;
+ "mov.pred\t$a, %arg$b", []>;
def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
- "mov.b16\t$a, %param$b", []>;
+ "mov.b16\t$a, %arg$b", []>;
def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
- "mov.b32\t$a, %param$b", []>;
+ "mov.b32\t$a, %arg$b", []>;
def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
- "mov.b64\t$a, %param$b", []>;
+ "mov.b64\t$a, %arg$b", []>;
def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
- "mov.f32\t$a, %param$b", []>;
+ "mov.f32\t$a, %arg$b", []>;
def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
- "mov.f64\t$a, %param$b", []>;
+ "mov.f64\t$a, %arg$b", []>;
def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
@@ -885,19 +893,26 @@ def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
// FMUL+FADD
def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
- (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>;
+ (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>,
+ Requires<[SupportsFMA]>;
def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
- (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
- (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>;
+ (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
- (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
- (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>;
+ (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>,
+ Requires<[SupportsFMA]>;
def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
- (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>;
+ (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
- (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>;
+ (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
// FNEG
def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
@@ -1004,6 +1019,9 @@ def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
+// setcc - predicate inversion for branch conditions
+def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)),
+ (XORripreds RegPred:$a, imm:$b)>;
///===- Intrinsic Instructions --------------------------------------------===//
include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td
index 9b4f56cf25c6..7a62684b91b9 100644
--- a/lib/Target/PTX/PTXInstrLoadStore.td
+++ b/lib/Target/PTX/PTXInstrLoadStore.td
@@ -1,4 +1,4 @@
-//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===//
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 9de1cb62719a..3416f1cca96d 100644
--- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -1,4 +1,4 @@
-//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index 468ce9301de4..3ed67a6a9b47 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -22,6 +22,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/PathV2.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -161,11 +162,12 @@ public:
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0);
- virtual void EmitValueToOffset(const MCExpr *Offset,
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename);
- virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename);
virtual void EmitInstruction(const MCInst &Inst);
@@ -174,7 +176,7 @@ public:
/// indicated by the hasRawTextSupport() predicate.
virtual void EmitRawText(StringRef String);
- virtual void Finish();
+ virtual void FinishImpl();
/// @}
@@ -476,8 +478,8 @@ void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {}
-void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) {}
+bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {return false;}
void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
@@ -489,11 +491,20 @@ void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
// FIXME: should we inherit from MCAsmStreamer?
bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
- StringRef Filename){
+ StringRef Directory,
+ StringRef Filename) {
+ if (!Directory.empty()) {
+ if (sys::path::is_absolute(Filename))
+ return EmitDwarfFileDirective(FileNo, "", Filename);
+ SmallString<128> FullPathName = Directory;
+ sys::path::append(FullPathName, Filename);
+ return EmitDwarfFileDirective(FileNo, "", FullPathName);
+ }
+
OS << "\t.file\t" << FileNo << ' ';
PrintQuotedString(Filename, OS);
EmitEOL();
- return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
}
void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
@@ -529,12 +540,13 @@ void PTXMCAsmStreamer::EmitRawText(StringRef String) {
EmitEOL();
}
-void PTXMCAsmStreamer::Finish() {}
+void PTXMCAsmStreamer::FinishImpl() {}
namespace llvm {
MCStreamer *createPTXAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
MCInstPrinter *IP,
MCCodeEmitter *CE, MCAsmBackend *MAB,
bool ShowInst) {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b33a273dc93d..172a0e031356 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -22,9 +22,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
// NOTE: PTXMFInfoExtract must after register allocation!
-namespace llvm {
+namespace {
/// PTXMFInfoExtract - PTX specific code to extract of PTX machine
/// function information for PTXAsmPrinter
///
@@ -42,7 +44,7 @@ namespace llvm {
return "PTX Machine Function Info Extractor";
}
}; // class PTXMFInfoExtract
-} // namespace llvm
+} // end anonymous namespace
using namespace llvm;
@@ -56,7 +58,22 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
- MFI->addVirtualRegister(TRC, Reg);
+ unsigned RegType;
+ if (TRC == PTX::RegPredRegisterClass)
+ RegType = PTXRegisterType::Pred;
+ else if (TRC == PTX::RegI16RegisterClass)
+ RegType = PTXRegisterType::B16;
+ else if (TRC == PTX::RegI32RegisterClass)
+ RegType = PTXRegisterType::B32;
+ else if (TRC == PTX::RegI64RegisterClass)
+ RegType = PTXRegisterType::B64;
+ else if (TRC == PTX::RegF32RegisterClass)
+ RegType = PTXRegisterType::F32;
+ else if (TRC == PTX::RegF64RegisterClass)
+ RegType = PTXRegisterType::F64;
+ else
+ llvm_unreachable("Unkown register class.");
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
}
return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/lib/Target/PTX/PTXMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..60acfc75a2f7
--- /dev/null
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void PTXMachineFunctionInfo::anchor() { }
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 3b985f7dd6b0..bb7574cbcd71 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -30,20 +30,27 @@ namespace llvm {
/// contains private PTX target-specific information for each MachineFunction.
///
class PTXMachineFunctionInfo : public MachineFunctionInfo {
-private:
+ virtual void anchor();
bool IsKernel;
DenseSet<unsigned> RegArgs;
DenseSet<unsigned> RegRets;
- typedef std::vector<unsigned> RegisterList;
- typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
- typedef DenseMap<unsigned, std::string> RegisterNameMap;
typedef DenseMap<int, std::string> FrameMap;
- RegisterMap UsedRegs;
- RegisterNameMap RegNames;
FrameMap FrameSymbols;
+ struct RegisterInfo {
+ unsigned Reg;
+ unsigned Type;
+ unsigned Space;
+ unsigned Offset;
+ unsigned Encoded;
+ };
+
+ typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
+
+ RegisterInfoMap RegInfo;
+
PTXParamManager ParamManager;
public:
@@ -51,13 +58,7 @@ public:
PTXMachineFunctionInfo(MachineFunction &MF)
: IsKernel(false) {
- UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
- UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
- UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
- UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
- UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
- UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
- }
+ }
/// getParamManager - Returns the PTXParamManager instance for this function.
PTXParamManager& getParamManager() { return ParamManager; }
@@ -78,69 +79,106 @@ public:
reg_iterator retreg_begin() const { return RegRets.begin(); }
reg_iterator retreg_end() const { return RegRets.end(); }
+ /// addRegister - Adds a virtual register to the set of all used registers
+ void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
+ if (!RegInfo.count(Reg)) {
+ RegisterInfo Info;
+ Info.Reg = Reg;
+ Info.Type = RegType;
+ Info.Space = RegSpace;
+
+ // Determine register offset
+ Info.Offset = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
+ e = RegInfo.end(); i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Space == RegSpace)
+ if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
+ Info.Offset++;
+ }
+
+ // Encode the register data into a single register number
+ Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
+
+ RegInfo[Reg] = Info;
+
+ if (RegSpace == PTXRegisterSpace::Argument)
+ RegArgs.insert(Reg);
+ else if (RegSpace == PTXRegisterSpace::Return)
+ RegRets.insert(Reg);
+ }
+ }
+
+ /// countRegisters - Returns the number of registers of the given type and
+ /// space.
+ unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
+ unsigned Count = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
+ i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Type == RegType && RI.Space == RegSpace)
+ Count++;
+ }
+ return Count;
+ }
+
+ /// getEncodedRegister - Returns the encoded value of the register.
+ unsigned getEncodedRegister(unsigned Reg) const {
+ return RegInfo.lookup(Reg).Encoded;
+ }
+
/// addRetReg - Adds a register to the set of return-value registers.
void addRetReg(unsigned Reg) {
if (!RegRets.count(Reg)) {
RegRets.insert(Reg);
- std::string name;
- name = "%ret";
- name += utostr(RegRets.size() - 1);
- RegNames[Reg] = name;
}
}
/// addArgReg - Adds a register to the set of function argument registers.
void addArgReg(unsigned Reg) {
RegArgs.insert(Reg);
- std::string name;
- name = "%param";
- name += utostr(RegArgs.size() - 1);
- RegNames[Reg] = name;
- }
-
- /// addVirtualRegister - Adds a virtual register to the set of all used
- /// registers in the function.
- void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
- std::string name;
-
- // Do not count registers that are argument/return registers.
- if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
- UsedRegs[TRC].push_back(Reg);
- if (TRC == PTX::RegPredRegisterClass)
- name = "%p";
- else if (TRC == PTX::RegI16RegisterClass)
- name = "%rh";
- else if (TRC == PTX::RegI32RegisterClass)
- name = "%r";
- else if (TRC == PTX::RegI64RegisterClass)
- name = "%rd";
- else if (TRC == PTX::RegF32RegisterClass)
- name = "%f";
- else if (TRC == PTX::RegF64RegisterClass)
- name = "%fd";
- else
- llvm_unreachable("Invalid register class");
-
- name += utostr(UsedRegs[TRC].size() - 1);
- RegNames[Reg] = name;
- }
}
/// getRegisterName - Returns the name of the specified virtual register. This
/// name is used during PTX emission.
- const char *getRegisterName(unsigned Reg) const {
- if (RegNames.count(Reg))
- return RegNames.find(Reg)->second.c_str();
+ std::string getRegisterName(unsigned Reg) const {
+ if (RegInfo.count(Reg)) {
+ const RegisterInfo& RI = RegInfo.lookup(Reg);
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, RI.Encoded);
+ NameStr.flush();
+ return Name;
+ }
else if (Reg == PTX::NoRegister)
return "%noreg";
else
llvm_unreachable("Register not in register name map");
}
- /// getNumRegistersForClass - Returns the number of virtual registers that are
- /// used for the specified register class.
- unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
- return UsedRegs.lookup(TRC).size();
+ /// getEncodedRegisterName - Returns the name of the encoded register.
+ std::string getEncodedRegisterName(unsigned EncodedReg) const {
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, EncodedReg);
+ NameStr.flush();
+ return Name;
+ }
+
+ /// getRegisterType - Returns the type of the specified virtual register.
+ unsigned getRegisterType(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Type;
+ else
+ llvm_unreachable("Unknown register");
+ }
+
+ /// getOffsetForRegister - Returns the offset of the virtual register
+ unsigned getOffsetForRegister(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Offset;
+ else
+ return 0;
}
/// getFrameSymbol - Returns the symbol name for the given FrameIndex.
@@ -148,13 +186,13 @@ public:
if (FrameSymbols.count(FrameIndex)) {
return FrameSymbols.lookup(FrameIndex).c_str();
} else {
- std::string Name = "__local";
- Name += utostr(FrameIndex);
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
// The whole point of caching this name is to ensure the pointer we pass
// to any getExternalSymbol() calls will remain valid for the lifetime of
// the back-end instance. This is to work around an issue in SelectionDAG
// where symbol names are expected to be life-long strings.
- FrameSymbols[FrameIndex] = Name;
+ FrameSymbols[FrameIndex] = Name;
return FrameSymbols[FrameIndex].c_str();
}
}
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
index 7753787ebc51..cc1cc711c82e 100644
--- a/lib/Target/PTX/PTXParamManager.cpp
+++ b/lib/Target/PTX/PTXParamManager.cpp
@@ -1,4 +1,4 @@
-//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===//
+//===-- PTXParamManager.cpp - Manager for .param variables ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXParamManager.h"
+#include "PTX.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
index 9fd2de52f7f2..92e7728b4f8b 100644
--- a/lib/Target/PTX/PTXParamManager.h
+++ b/lib/Target/PTX/PTXParamManager.h
@@ -1,4 +1,4 @@
-//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==//
+//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include <string>
namespace llvm {
diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp
index 2d2d5c30c8ca..7fd53752bf66 100644
--- a/lib/Target/PTX/PTXRegAlloc.cpp
+++ b/lib/Target/PTX/PTXRegAlloc.cpp
@@ -24,10 +24,7 @@ namespace {
class PTXRegAlloc : public MachineFunctionPass {
public:
static char ID;
- PTXRegAlloc() : MachineFunctionPass(ID) {
- initializePHIEliminationPass(*PassRegistry::getPassRegistry());
- initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
- }
+ PTXRegAlloc() : MachineFunctionPass(ID) {}
virtual const char* getPassName() const {
return "PTX Register Allocator";
@@ -35,8 +32,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(PHIEliminationID);
- AU.addRequiredID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index c8062664a93b..b6ffd38232e3 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXRegisterInfo.h"
+#include "PTX.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -31,44 +31,8 @@ PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
: PTXGenRegisterInfo(0), TII(tii) {
}
-void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj,
- RegScavenger *RS) const {
- unsigned Index;
- MachineInstr &MI = *II;
- //MachineBasicBlock &MBB = *MI.getParent();
- //DebugLoc dl = MI.getDebugLoc();
- //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-
- //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
-
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/,
+ int /*SPAdj*/,
+ RegScavenger * /*RS*/) const {
llvm_unreachable("FrameIndex should have been previously eliminated!");
-
- Index = 0;
- while (!MI.getOperand(Index).isFI()) {
- ++Index;
- assert(Index < MI.getNumOperands() &&
- "Instr does not have a FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(Index).getIndex();
-
- DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
- DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
- DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
-
- //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
- //.addReg(Reg, RegState::Define).addImm(FrameIndex);
- //if (MI2->findFirstPredOperandIdx() == -1) {
- // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
- // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
- //}
- //MI2->dump();
-
- //MachineOperand ESOp = MachineOperand::CreateES("__local__");
-
- // This frame index is post stack slot re-use assignments
- //MI.getOperand(Index).ChangeToRegister(Reg, false);
- MI.getOperand(Index).ChangeToImmediate(FrameIndex);
- //MI.getOperand(Index) = ESOp;
}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index 55fafe47bf35..5614ce793b90 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -32,9 +32,9 @@ public:
PTXRegisterInfo(PTXTargetMachine &TM,
const TargetInstrInfo &tii);
- virtual const unsigned
+ virtual const uint16_t
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
- static const unsigned CalleeSavedRegs[] = { 0 };
+ static const uint16_t CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs; // save nothing
}
@@ -49,7 +49,6 @@ public:
virtual unsigned getFrameRegister(const MachineFunction &MF) const {
llvm_unreachable("PTX does not have a frame register");
- return 0;
}
}; // struct PTXRegisterInfo
} // namespace llvm
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index 6ed6d3fe385f..e8b262e48bde 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -1,5 +1,4 @@
-
-//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
index 50ef14a13d94..a116fabaa53b 100644
--- a/lib/Target/PTX/PTXSelectionDAGInfo.cpp
+++ b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -70,7 +70,7 @@ PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
DAG.getNode(ISD::ADD, dl, PointerType, Src,
DAG.getConstant(SrcOff, PointerType)),
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, 0);
+ false, false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -108,7 +108,8 @@ PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, PointerType, Src,
DAG.getConstant(SrcOff, PointerType)),
- SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ SrcPtrInfo.getWithOffset(SrcOff), false, false,
+ false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index 1eb57d2f1702..454f64e6bba3 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -22,6 +22,8 @@
using namespace llvm;
+void PTXSubtarget::anchor() { }
+
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: PTXGenSubtargetInfo(TT, CPU, FS),
@@ -57,10 +59,10 @@ std::string PTXSubtarget::getTargetString() const {
std::string PTXSubtarget::getPTXVersionString() const {
switch(PTXVersion) {
- default: llvm_unreachable("Unknown PTX version");
case PTX_VERSION_2_0: return "2.0";
case PTX_VERSION_2_1: return "2.1";
case PTX_VERSION_2_2: return "2.2";
case PTX_VERSION_2_3: return "2.3";
}
+ llvm_unreachable("Invalid PTX version");
}
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index b946d7c11cef..ce93fef02a11 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -1,4 +1,4 @@
-//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,6 +23,7 @@ namespace llvm {
class StringRef;
class PTXSubtarget : public PTXGenSubtargetInfo {
+ virtual void anchor();
public:
/**
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 449a3d9fc8d4..c55a658dc375 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXTargetMachine.h"
+#include "PTX.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -26,6 +25,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
@@ -37,8 +37,6 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -46,7 +44,7 @@ using namespace llvm;
namespace llvm {
MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc,
- bool useCFI,
+ bool useCFI, bool useDwarfDirectory,
MCInstPrinter *InstPrint,
MCCodeEmitter *CE,
MCAsmBackend *MAB,
@@ -67,29 +65,16 @@ namespace {
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
-
- // Copied from LLVMTargetMachine.cpp
- void printNoVerify(PassManagerBase &PM, const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
- }
-
- void printAndVerify(PassManagerBase &PM,
- const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
- //if (VerifyMachineCode)
- // PM.add(createMachineVerifierPass(Banner));
- }
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
@@ -98,276 +83,83 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
TLInfo(*this) {
}
+void PTX32TargetMachine::anchor() { }
+
PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
+void PTX64TargetMachine::anchor() { }
+
PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
-bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createPTXISelDag(*this, OptLevel));
- return false;
+namespace llvm {
+/// PTX Code Generator Pass Configuration Options.
+class PTXPassConfig : public TargetPassConfig {
+public:
+ PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ PTXTargetMachine &getPTXTargetMachine() const {
+ return getTM<PTXTargetMachine>();
+ }
+
+ bool addInstSelector();
+ FunctionPass *createTargetRegisterAllocator(bool);
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+ bool addPostRegAlloc();
+ void addMachineLateOptimization();
+ bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) {
+ PTXPassConfig *PassConfig = new PTXPassConfig(this, PM);
+ PassConfig->disablePass(PrologEpilogCodeInserterID);
+ return PassConfig;
}
-bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // PTXMFInfoExtract must after register allocation!
- //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+bool PTXPassConfig::addInstSelector() {
+ PM.add(createPTXISelDag(getPTXTargetMachine(), getOptLevel()));
return false;
}
-bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify) {
- // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
-
- // Add common CodeGen passes.
- MCContext *Context = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
- return true;
- assert(Context != 0 && "Failed to get MCContext");
-
- if (hasMCSaveTempLabels())
- Context->setAllowTemporaryLabels(false);
-
- const MCAsmInfo &MAI = *getMCAsmInfo();
- const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- OwningPtr<MCStreamer> AsmStreamer;
-
- switch (FileType) {
- default: return true;
- case CGFT_AssemblyFile: {
- MCInstPrinter *InstPrinter =
- getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
-
- // Create a code emitter if asked to show the encoding.
- MCCodeEmitter *MCE = 0;
- MCAsmBackend *MAB = 0;
-
- MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
- true, /* verbose asm */
- hasMCUseLoc(),
- hasMCUseCFI(),
- InstPrinter,
- MCE, MAB,
- false /* show MC encoding */);
- AsmStreamer.reset(S);
- break;
- }
- case CGFT_ObjectFile: {
- llvm_unreachable("Object file emission is not supported with PTX");
- }
- case CGFT_Null:
- // The Null output is intended for use for performance analysis and testing,
- // not real users.
- AsmStreamer.reset(createNullStreamer(*Context));
- break;
- }
-
- // MC Logging
- //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
-
- // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
- FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
- if (Printer == 0)
- return true;
-
- // If successful, createAsmPrinter took ownership of AsmStreamer.
- AsmStreamer.take();
+FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) {
+ return createPTXRegisterAllocator();
+}
- PM.add(Printer);
+// Modify the optimized compilation path to bypass optimized register alloction.
+void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addFastRegAlloc(RegAllocPass);
+}
- PM.add(createGCInfoDeleter());
+bool PTXPassConfig::addPostRegAlloc() {
+ // PTXMFInfoExtract must after register allocation!
+ //PM.add(createPTXMFInfoExtract(getPTXTargetMachine()));
return false;
}
-bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify,
- MCContext *&OutContext) {
- // Add standard LLVM codegen passes.
- // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
- // modifications for the PTX target.
-
- // Standard LLVM-Level Passes.
-
- // Basic AliasAnalysis support.
- // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
- // BasicAliasAnalysis wins if they disagree. This is intended to help
- // support "obvious" type-punning idioms.
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createBasicAliasAnalysisPass());
-
- // Before running any passes, run the verifier to determine if the input
- // coming from the front-end and/or optimizer is valid.
- if (!DisableVerify)
- PM.add(createVerifierPass());
-
- // Run loop strength reduction before anything else.
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createLoopStrengthReducePass(getTargetLowering()));
- //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
- }
-
- PM.add(createGCLoweringPass());
-
- // Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
-
- PM.add(createLowerInvokePass(getTargetLowering()));
- // The lower invoke pass may create unreachable code. Remove it.
- PM.add(createUnreachableBlockEliminationPass());
-
- if (OptLevel != CodeGenOpt::None)
- PM.add(createCodeGenPreparePass(getTargetLowering()));
+/// Add passes that optimize machine instructions after register allocation.
+void PTXPassConfig::addMachineLateOptimization() {
+ if (addPass(BranchFolderPassID) != &NoPassID)
+ printAndVerify("After BranchFolding");
- PM.add(createStackProtectorPass(getTargetLowering()));
-
- addPreISel(PM, OptLevel);
-
- //PM.add(createPrintFunctionPass("\n\n"
- // "*** Final LLVM Code input to ISel ***\n",
- // &dbgs()));
-
- // All passes which modify the LLVM IR are now complete; run the verifier
- // to ensure that the IR is valid.
- if (!DisableVerify)
- PM.add(createVerifierPass());
-
- // Standard Lower-Level Passes.
-
- // Install a MachineModuleInfo class, which is an immutable pass that holds
- // all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
- *getRegisterInfo(),
- &getTargetLowering()->getObjFileLowering());
- PM.add(MMI);
- OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
- // Set up a MachineFunction for the rest of CodeGen to work on.
- PM.add(new MachineFunctionAnalysis(*this, OptLevel));
-
- // Ask the target for an isel.
- if (addInstSelector(PM, OptLevel))
- return true;
-
- // Print the instruction selected machine code...
- printAndVerify(PM, "After Instruction Selection");
-
- // Expand pseudo-instructions emitted by ISel.
- PM.add(createExpandISelPseudosPass());
-
- // Pre-ra tail duplication.
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createTailDuplicatePass(true));
- printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
- }
-
- // Optimize PHIs before DCE: removing dead PHI cycles may make more
- // instructions dead.
- if (OptLevel != CodeGenOpt::None)
- PM.add(createOptimizePHIsPass());
-
- // If the target requests it, assign local variables to stack slots relative
- // to one another and simplify frame index references where possible.
- PM.add(createLocalStackSlotAllocationPass());
-
- if (OptLevel != CodeGenOpt::None) {
- // With optimization, dead code should already be eliminated. However
- // there is one known exception: lowered code for arguments that are only
- // used by tail calls, where the tail calls reuse the incoming stack
- // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass");
-
- PM.add(createMachineLICMPass());
- PM.add(createMachineCSEPass());
- PM.add(createMachineSinkingPass());
- printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
-
- PM.add(createPeepholeOptimizerPass());
- printAndVerify(PM, "After codegen peephole optimization pass");
- }
-
- // Run pre-ra passes.
- if (addPreRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PreRegAlloc passes");
-
- // Perform register allocation.
- PM.add(createPTXRegisterAllocator());
- printAndVerify(PM, "After Register Allocation");
-
- // Perform stack slot coloring and post-ra machine LICM.
- if (OptLevel != CodeGenOpt::None) {
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- PM.add(createStackSlotColoringPass(false));
-
- // FIXME: Post-RA LICM has asserts that fire on virtual registers.
- // Run post-ra machine LICM to hoist reloads / remats.
- //if (!DisablePostRAMachineLICM)
- // PM.add(createMachineLICMPass(false));
-
- printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
- }
-
- // Run post-ra passes.
- if (addPostRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PostRegAlloc passes");
-
- PM.add(createExpandPostRAPseudosPass());
- printAndVerify(PM, "After ExpandPostRAPseudos");
-
- // Insert prolog/epilog code. Eliminate abstract frame index references...
- PM.add(createPrologEpilogCodeInserter());
- printAndVerify(PM, "After PrologEpilogCodeInserter");
-
- // Run pre-sched2 passes.
- if (addPreSched2(PM, OptLevel))
- printAndVerify(PM, "After PreSched2 passes");
-
- // Second pass scheduler.
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createPostRAScheduler(OptLevel));
- printAndVerify(PM, "After PostRAScheduler");
- }
-
- // Branch folding must be run after regalloc and prolog/epilog insertion.
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
- printNoVerify(PM, "After BranchFolding");
- }
-
- // Tail duplication.
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createTailDuplicatePass(false));
- printNoVerify(PM, "After TailDuplicate");
- }
-
- PM.add(createGCMachineCodeAnalysisPass());
-
- //if (PrintGCInfo)
- // PM.add(createGCInfoPrinter(dbgs()));
-
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createCodePlacementOptPass());
- printNoVerify(PM, "After CodePlacementOpt");
- }
-
- if (addPreEmitPass(PM, OptLevel))
- printNoVerify(PM, "After PreEmit passes");
-
- PM.add(createPTXMFInfoExtract(*this, OptLevel));
- PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+ if (addPass(TailDuplicateID) != &NoPassID)
+ printAndVerify("After TailDuplicate");
+}
- return false;
+bool PTXPassConfig::addPreEmitPass() {
+ PM.add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel()));
+ PM.add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel()));
+ return true;
}
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 5b7c82b1f4f4..278d1555b00b 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -35,8 +35,9 @@ class PTXTargetMachine : public LLVMTargetMachine {
public:
PTXTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -58,22 +59,9 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPostRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
-
- // We override this method to supply our own set of codegen passes.
- virtual bool addPassesToEmitFile(PassManagerBase &,
- formatted_raw_ostream &,
- CodeGenFileType,
- CodeGenOpt::Level,
- bool = true);
-
// Emission of machine code through JITCodeEmitter is not supported.
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
JITCodeEmitter &,
- CodeGenOpt::Level,
bool = true) {
return true;
}
@@ -82,32 +70,33 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual bool addPassesToEmitMC(PassManagerBase &,
MCContext *&,
raw_ostream &,
- CodeGenOpt::Level,
bool = true) {
return true;
}
- private:
-
- bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
- bool DisableVerify, MCContext *&OutCtx);
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // class PTXTargetMachine
class PTX32TargetMachine : public PTXTargetMachine {
+ virtual void anchor();
public:
PTX32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
class PTX64TargetMachine : public PTXTargetMachine {
+ virtual void anchor();
public:
PTX64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
} // namespace llvm
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
index 2366e45294f8..d9a5da3a9082 100644
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo
PTXTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMPTXInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..2cc30c422f14
--- /dev/null
+++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PTXInfo
+parent = PTX
+required_libraries = MC Support Target
+add_to_library_groups = PTX
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 73b4aba9f015..bcd8bd291623 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,13 +1,13 @@
set(LLVM_TARGET_DEFINITIONS PPC.td)
-llvm_tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(PPCGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-llvm_tablegen(PPCGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(PPCGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(PPCGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(PPCGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(PPCGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(PowerPCCommonTableGen)
add_llvm_target(PowerPCCodeGen
@@ -21,26 +21,13 @@ add_llvm_target(PowerPCCodeGen
PPCFrameLowering.cpp
PPCJITInfo.cpp
PPCMCInstLower.cpp
+ PPCMachineFunctionInfo.cpp
PPCRegisterInfo.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
PPCSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCCodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMPowerPCAsmPrinter
- LLVMPowerPCDesc
- LLVMPowerPCInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index 1d857e2f48d4..a605cc4b5f27 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter
PPCInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..7c691deafccf
--- /dev/null
+++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCAsmPrinter
+parent = PowerPC
+required_libraries = MC Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index b6a08354a210..61d23ce06aa1 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,16 +17,12 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define GET_INSTRUCTION_NAME
#include "PPCGenAsmWriter.inc"
-StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
-
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
@@ -94,7 +90,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
unsigned Code = MI->getOperand(OpNo).getImm();
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
- default: assert(0 && "Invalid predicate");
case PPC::PRED_ALWAYS: return; // Don't print anything for always.
case PPC::PRED_LT: O << "lt"; return;
case PPC::PRED_LE: O << "le"; return;
@@ -175,7 +170,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
unsigned CCReg = MI->getOperand(OpNo).getReg();
unsigned RegNo;
switch (CCReg) {
- default: assert(0 && "Unknown CR register");
+ default: llvm_unreachable("Unknown CR register");
case PPC::CR0: RegNo = 0; break;
case PPC::CR1: RegNo = 1; break;
case PPC::CR2: RegNo = 2; break;
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 4ed4b765c1c7..73fd5342a165 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,8 +24,9 @@ class PPCInstPrinter : public MCInstPrinter {
// 0 -> AIX, 1 -> Darwin.
unsigned SyntaxVariant;
public:
- PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
- : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+ PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, unsigned syntaxVariant)
+ : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {}
bool isDarwinSyntax() const {
return SyntaxVariant == 1;
@@ -33,9 +34,6 @@ public:
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
-
- static const char *getInstructionName(unsigned Opcode);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
new file mode 100644
index 000000000000..95fac5471ec7
--- /dev/null
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -0,0 +1,33 @@
+;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = PowerPC
+parent = Target
+has_asmprinter = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = PowerPCCodeGen
+parent = PowerPC
+required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index c4041db8cf0b..b674883db7de 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -4,13 +4,7 @@ add_llvm_library(LLVMPowerPCDesc
PPCMCAsmInfo.cpp
PPCMCCodeEmitter.cpp
PPCPredicates.cpp
- )
-
-add_llvm_library_dependencies(LLVMPowerPCDesc
- LLVMMC
- LLVMPowerPCAsmPrinter
- LLVMPowerPCInfo
- LLVMSupport
+ PPCELFObjectWriter.cpp
)
add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..d3a567d1581d
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCDesc
+parent = PowerPC
+required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9f2fd6d01b8e..48de583afdf1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,10 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -57,13 +58,6 @@ public:
MCValue Target, uint64_t &FixedValue) {}
};
-class PPCELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine,
- bool HasRelocationAddend, bool isLittleEndian)
- : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {}
-};
-
class PPCAsmBackend : public MCAsmBackend {
const Target &TheTarget;
public:
@@ -80,33 +74,42 @@ public:
{ "fixup_ppc_ha16", 16, 16, 0 },
{ "fixup_ppc_lo14", 16, 14, 0 }
};
-
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
-
+
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
-
- bool MayNeedRelaxation(const MCInst &Inst) const {
+
+ bool mayNeedRelaxation(const MCInst &Inst) const {
// FIXME.
return false;
}
-
- void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
// FIXME.
- assert(0 && "RelaxInstruction() unimplemented");
+ llvm_unreachable("relaxInstruction() unimplemented");
}
-
- bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// FIXME: Zero fill for now. That's not right, but at least will get the
// section size right.
for (uint64_t i = 0; i != Count; ++i)
OW->Write8(0);
return true;
- }
-
+ }
+
unsigned getPointerSize() const {
StringRef Name = TheTarget.getName();
if (Name == "ppc64") return 8;
@@ -122,12 +125,12 @@ namespace {
class DarwinPPCAsmBackend : public PPCAsmBackend {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
-
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
- assert(0 && "UNIMP");
+ llvm_unreachable("UNIMP");
}
-
+
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
return createMachObjectWriter(new PPCMachObjectWriter(
@@ -137,19 +140,19 @@ namespace {
object::mach::CSPPC_ALL),
OS, /*IsLittleEndian=*/false);
}
-
+
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
return false;
}
};
class ELFPPCAsmBackend : public PPCAsmBackend {
- Triple::OSType OSType;
+ uint8_t OSABI;
public:
- ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) :
- PPCAsmBackend(T), OSType(OSType) { }
-
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
+ PPCAsmBackend(T), OSABI(OSABI) { }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -162,17 +165,12 @@ namespace {
for (unsigned i = 0; i != 4; ++i)
Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
}
-
+
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
- return createELFObjectWriter(new PPCELFObjectWriter(
- /*Is64Bit=*/is64,
- OSType,
- is64 ? ELF::EM_PPC64 : ELF::EM_PPC,
- /*addend*/ true, /*isLittleEndian*/ false),
- OS, /*IsLittleEndian=*/false);
+ return createPPCELFObjectWriter(OS, is64, OSABI);
}
-
+
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
return false;
}
@@ -187,5 +185,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) {
if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
- return new ELFPPCAsmBackend(T, Triple(TT).getOS());
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFPPCAsmBackend(T, OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
index 369bbdce11f5..9c975c089ea6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===//
+//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
new file mode 100644
index 000000000000..a19798157bf3
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -0,0 +1,103 @@
+//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+ virtual ~PPCELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+ };
+}
+
+PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI,
+ Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*HasRelocationAddend*/ true) {}
+
+PPCELFObjectWriter::~PPCELFObjectWriter() {
+}
+
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case PPC::fixup_ppc_br24:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case FK_PCRel_4:
+ Type = ELF::R_PPC_REL32;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case PPC::fixup_ppc_br24:
+ Type = ELF::R_PPC_ADDR24;
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
+ break;
+ case PPC::fixup_ppc_ha16:
+ Type = ELF::R_PPC_ADDR16_HA;
+ break;
+ case PPC::fixup_ppc_lo16:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ case PPC::fixup_ppc_lo14:
+ Type = ELF::R_PPC_ADDR14;
+ break;
+ case FK_Data_4:
+ Type = ELF::R_PPC_ADDR32;
+ break;
+ case FK_Data_2:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ }
+ }
+ return Type;
+}
+
+void PPCELFObjectWriter::
+adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
+ switch ((unsigned)Fixup.getKind()) {
+ case PPC::fixup_ppc_ha16:
+ case PPC::fixup_ppc_lo16:
+ RelocOffset += 2;
+ break;
+ default:
+ break;
+ }
+}
+
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index e9424d8415f6..245b4578bf28 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
#include "PPCMCAsmInfo.h"
using namespace llvm;
+void PPCMCAsmInfoDarwin::anchor() { }
+
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
if (is64Bit)
PointerSize = 8;
@@ -30,6 +32,8 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
SupportsDebugInformation= true; // Debug information.
}
+void PPCLinuxMCAsmInfo::anchor() { }
+
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
if (is64Bit)
PointerSize = 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 96ae6fbba0e4..7b4ed9f14eb6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,11 +18,15 @@
namespace llvm {
- struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
explicit PPCMCAsmInfoDarwin(bool is64Bit);
};
- struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+ class PPCLinuxMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit PPCLinuxMCAsmInfo(bool is64Bit);
};
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 262f97c36a93..5a6827ffd8d3 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -57,7 +57,7 @@ public:
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
- unsigned getBinaryCodeForInstr(const MCInst &MI,
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index d5c8a9e72c67..6568e82e2bf0 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,6 +20,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -76,7 +77,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
@@ -86,7 +88,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
else
RM = Reloc::Static;
}
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
@@ -106,8 +108,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
- return new PPCInstPrinter(MAI, SyntaxVariant);
+ return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant);
}
extern "C" void LLVMInitializePowerPCTargetMC() {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index e5bf2a9dd92f..b7fa0646288d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,14 +14,18 @@
#ifndef PPCMCTARGETDESC_H
#define PPCMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCObjectWriter;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_ostream;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
@@ -31,7 +35,11 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx);
MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT);
-
+
+/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint8_t OSABI);
} // End llvm namespace
// Defines symbolic names for PowerPC registers. This defines a mapping from
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 5dc1863a0b2d..24a7178d1ff9 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,14 +25,11 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
- class formatted_raw_ostream;
class JITCodeEmitter;
- class Target;
class MachineInstr;
class AsmPrinter;
class MCInst;
- class TargetMachine;
-
+
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 2d5d302728f7..c554d39434c8 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -1,10 +1,10 @@
-//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
-//
+//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This is the top level entry point for the PowerPC target.
@@ -23,6 +23,7 @@ include "llvm/Target/Target.td"
// CPU Directives //
//===----------------------------------------------------------------------===//
+def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
@@ -33,6 +34,7 @@ def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -46,6 +48,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
+ "Enable Book E instructions">;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -60,6 +64,8 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603]>;
@@ -82,6 +88,10 @@ def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+ FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit
+ /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 952845943179..fb7aa71d98d3 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,6 +20,7 @@
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "PPCSubtarget.h"
+#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
@@ -39,6 +40,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -49,10 +51,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ELF.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/SmallString.h"
-#include "InstPrinter/PPCInstPrinter.h"
using namespace llvm;
namespace {
@@ -366,14 +367,21 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::MFCRpseud:
+ case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
// Into: %R3 = MFCR ;; cr7
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- TmpInst.setOpcode(PPC::MFCR);
+ TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
OutStreamer.EmitInstruction(TmpInst);
return;
+ case PPC::SYNC:
+ // In Book E sync is called msync, handle this special case here...
+ if (Subtarget.isBookE()) {
+ OutStreamer.EmitRawText(StringRef("\tmsync"));
+ return;
+ }
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
@@ -385,14 +393,26 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
return AsmPrinter::EmitFunctionEntryLabel();
// Emit an official procedure descriptor.
- // FIXME 64-bit SVR4: Use MCSection here!
- OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
- OutStreamer.EmitRawText(StringRef("\t.align 3"));
+ const MCSection *Current = OutStreamer.getCurrentSection();
+ const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(Section);
OutStreamer.EmitLabel(CurrentFnSym);
- OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
- ",.TOC.@tocbase");
- OutStreamer.EmitRawText(StringRef("\t.previous"));
- OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+ OutStreamer.EmitValueToAlignment(8);
+ MCSymbol *Symbol1 =
+ OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase"));
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+ Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext),
+ Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.SwitchSection(Current);
+
+ MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
+ ".L." + Twine(CurrentFnSym->getName()));
+ OutStreamer.EmitLabel(RealFnSym);
+ CurrentFnSymForSize = RealFnSym;
}
@@ -402,8 +422,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TD->getPointerSizeInBits() == 64;
if (isPPC64 && !TOC.empty()) {
- // FIXME 64-bit SVR4: Use MCSection here?
- OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+ const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(Section);
// FIXME: This is nondeterminstic!
for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
@@ -421,12 +443,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static const char *const CPUDirectives[] = {
"",
"ppc",
+ "ppc440",
"ppc601",
"ppc602",
"ppc603",
"ppc7400",
"ppc750",
"ppc970",
+ "ppcA2",
"ppc64"
};
@@ -435,7 +459,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
Directive = PPC::DIR_970;
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
Directive = PPC::DIR_7400;
- if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 475edf309c0c..5f775e16f1ca 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -1,4 +1,4 @@
-//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 441db94581ae..9883c2e42995 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,10 +1,10 @@
//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the PowerPC 32- and 64-bit
@@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[
CCCustom<"CC_PPC_SVR4_Custom_Dummy">
]>;
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 4a1f1822afdf..252a2d159ec3 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -50,7 +50,7 @@ namespace {
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
- unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
MachineRelocation GetRelocation(const MachineOperand &MO,
@@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
@@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
if (MO.isReg()) {
// MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
return getPPCRegisterNumbering(MO.getReg());
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0b85fea65758..b77a80bbf30d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -38,7 +38,7 @@ using namespace llvm;
/// VRRegNo - Map from a numbered VR register to its enum value.
///
-static const unsigned short VRRegNo[] = {
+static const uint16_t VRRegNo[] = {
PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
@@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// epilog blocks.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn()) {
+ if (!I->empty() && I->back().isReturn()) {
bool FoundIt = false;
for (MBBI = I->end(); MBBI != I->begin(); ) {
--MBBI;
@@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
if (MF.getFunction()->hasFnAttr(Attribute::Naked))
return false;
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
- (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
@@ -365,8 +367,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
- .addReg(PPC::R1)
- .addReg(PPC::R1)
+ .addReg(PPC::R1, RegState::Kill)
+ .addReg(PPC::R1, RegState::Define)
.addReg(PPC::R0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
@@ -380,8 +382,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
- .addReg(PPC::R1)
- .addReg(PPC::R1)
+ .addReg(PPC::R1, RegState::Kill)
+ .addReg(PPC::R1, RegState::Define)
.addReg(PPC::R0);
}
} else { // PPC64.
@@ -398,8 +400,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X0)
.addImm(NegFrameSize);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
- .addReg(PPC::X1)
- .addReg(PPC::X1)
+ .addReg(PPC::X1, RegState::Kill)
+ .addReg(PPC::X1, RegState::Define)
.addReg(PPC::X0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
@@ -413,8 +415,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
- .addReg(PPC::X1)
- .addReg(PPC::X1)
+ .addReg(PPC::X1, RegState::Kill)
+ .addReg(PPC::X1, RegState::Define)
.addReg(PPC::X0);
}
}
@@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
- if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
@@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
@@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: doesn't detect whether or not we need to spill vXX, which requires
// r0 for now.
- if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+ if (RegInfo->requiresRegisterScavenging(MF))
if (needsFP(MF) || spillsCR(MF)) {
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
@@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Take into account stack space reserved for tail calls.
int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
LowerBound = TCSPDelta;
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 20faa71d4148..d708541c6686 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -1,4 +1,4 @@
-//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index cddc9d858adf..6ed1fb9e6a3c 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -22,6 +22,30 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// PowerPC Scoreboard Hazard Recognizer
+void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ // This is a PPC pseudo-instruction.
+ return;
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+ScheduleHazardRecognizer::HazardType
+PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void PPCScoreboardHazardRecognizer::AdvanceCycle() {
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCScoreboardHazardRecognizer::Reset() {
+ ScoreboardHazardRecognizer::Reset();
+}
+
+//===----------------------------------------------------------------------===//
// PowerPC 970 Hazard Recognizer
//
// This models the dispatch group formation of the PPC970 processor. Dispatch
@@ -67,12 +91,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- if ((int)Opcode >= 0) {
- isFirst = isSingle = isCracked = isLoad = isStore = false;
- return PPCII::PPC970_Pseudo;
- }
- Opcode = ~Opcode;
-
const MCInstrDesc &MCID = TII.get(Opcode);
isLoad = MCID.mayLoad();
@@ -89,29 +107,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
bool PPCHazardRecognizer970::
-isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const {
for (unsigned i = 0, e = NumStores; i != e; ++i) {
// Handle exact and commuted addresses.
- if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
- return true;
- if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
return true;
// Okay, we don't have an exact match, if this is an indexed offset, see if
// we have overlap (which happens during fp->int conversion for example).
- if (StorePtr2[i] == Ptr2) {
- if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
- if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
- // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
- // to see if the load and store actually overlap.
- int StoreOffs = StoreOffset->getZExtValue();
- int LoadOffs = LoadOffset->getZExtValue();
- if (StoreOffs < LoadOffs) {
- if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
- } else {
- if (int(LoadOffs+LoadSize) > StoreOffs) return true;
- }
- }
+ if (StoreValue[i] == LoadValue) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ if (StoreOffset[i] < LoadOffset) {
+ if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+ } else {
+ if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+ }
}
}
return false;
@@ -125,13 +137,17 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
getHazardType(SUnit *SU, int Stalls) {
assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
- const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return NoHazard;
+
+ unsigned Opcode = MI->getOpcode();
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
- GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
isLoad, isStore);
if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
- unsigned Opcode = Node->getMachineOpcode();
// We can only issue a PPC970_First/PPC970_Single instruction (such as
// crand/mtspr/etc) if this is the first cycle of the dispatch group.
@@ -168,55 +184,10 @@ getHazardType(SUnit *SU, int Stalls) {
// If this is a load following a store, make sure it's not to the same or
// overlapping address.
- if (isLoad && NumStores) {
- unsigned LoadSize;
- switch (Opcode) {
- default: llvm_unreachable("Unknown load!");
- case PPC::LBZ: case PPC::LBZU:
- case PPC::LBZX:
- case PPC::LBZ8: case PPC::LBZU8:
- case PPC::LBZX8:
- case PPC::LVEBX:
- LoadSize = 1;
- break;
- case PPC::LHA: case PPC::LHAU:
- case PPC::LHAX:
- case PPC::LHZ: case PPC::LHZU:
- case PPC::LHZX:
- case PPC::LVEHX:
- case PPC::LHBRX:
- case PPC::LHA8: case PPC::LHAU8:
- case PPC::LHAX8:
- case PPC::LHZ8: case PPC::LHZU8:
- case PPC::LHZX8:
- LoadSize = 2;
- break;
- case PPC::LFS: case PPC::LFSU:
- case PPC::LFSX:
- case PPC::LWZ: case PPC::LWZU:
- case PPC::LWZX:
- case PPC::LWA:
- case PPC::LWAX:
- case PPC::LVEWX:
- case PPC::LWBRX:
- case PPC::LWZ8:
- case PPC::LWZX8:
- LoadSize = 4;
- break;
- case PPC::LFD: case PPC::LFDU:
- case PPC::LFDX:
- case PPC::LD: case PPC::LDU:
- case PPC::LDX:
- LoadSize = 8;
- break;
- case PPC::LVX:
- case PPC::LVXL:
- LoadSize = 16;
- break;
- }
-
- if (isLoadOfStoredAddress(LoadSize,
- Node->getOperand(0), Node->getOperand(1)))
+ if (isLoad && NumStores && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ if (isLoadOfStoredAddress(MO->getSize(),
+ MO->getOffset(), MO->getValue()))
return NoopHazard;
}
@@ -224,66 +195,27 @@ getHazardType(SUnit *SU, int Stalls) {
}
void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
- const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return;
+
+ unsigned Opcode = MI->getOpcode();
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
- GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
isLoad, isStore);
if (InstrType == PPCII::PPC970_Pseudo) return;
- unsigned Opcode = Node->getMachineOpcode();
// Update structural hazard information.
if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
// Track the address stored to.
- if (isStore) {
- unsigned ThisStoreSize;
- switch (Opcode) {
- default: llvm_unreachable("Unknown store instruction!");
- case PPC::STB: case PPC::STB8:
- case PPC::STBU: case PPC::STBU8:
- case PPC::STBX: case PPC::STBX8:
- case PPC::STVEBX:
- ThisStoreSize = 1;
- break;
- case PPC::STH: case PPC::STH8:
- case PPC::STHU: case PPC::STHU8:
- case PPC::STHX: case PPC::STHX8:
- case PPC::STVEHX:
- case PPC::STHBRX:
- ThisStoreSize = 2;
- break;
- case PPC::STFS:
- case PPC::STFSU:
- case PPC::STFSX:
- case PPC::STWX: case PPC::STWX8:
- case PPC::STWUX:
- case PPC::STW: case PPC::STW8:
- case PPC::STWU:
- case PPC::STVEWX:
- case PPC::STFIWX:
- case PPC::STWBRX:
- ThisStoreSize = 4;
- break;
- case PPC::STD_32:
- case PPC::STDX_32:
- case PPC::STD:
- case PPC::STDU:
- case PPC::STFD:
- case PPC::STFDX:
- case PPC::STDX:
- case PPC::STDUX:
- ThisStoreSize = 8;
- break;
- case PPC::STVX:
- case PPC::STVXL:
- ThisStoreSize = 16;
- break;
- }
-
- StoreSize[NumStores] = ThisStoreSize;
- StorePtr1[NumStores] = Node->getOperand(1);
- StorePtr2[NumStores] = Node->getOperand(2);
+ if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ StoreSize[NumStores] = MO->getSize();
+ StoreOffset[NumStores] = MO->getOffset();
+ StoreValue[NumStores] = MO->getValue();
++NumStores;
}
@@ -306,3 +238,8 @@ void PPCHazardRecognizer970::AdvanceCycle() {
if (NumIssued == 5)
EndDispatchGroup();
}
+
+void PPCHazardRecognizer970::Reset() {
+ EndDispatchGroup();
+}
+
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 2f81f0f7c7f1..55b45d01b20e 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -14,12 +14,28 @@
#ifndef PPCHAZRECS_H
#define PPCHAZRECS_H
+#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "PPCInstrInfo.h"
namespace llvm {
+/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for generic PPC processors.
+class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ScheduleDAG *DAG;
+public:
+ PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG_) :
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void Reset();
+};
+
/// PPCHazardRecognizer970 - This class defines a finite state automata that
/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
/// promotes good dispatch group formation and implements noop insertion to
@@ -42,8 +58,9 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
//
// This is null if we haven't seen a store yet. We keep track of both
// operands of the store here, since we support [r+r] and [r+i] addressing.
- SDValue StorePtr1[4], StorePtr2[4];
- unsigned StoreSize[4];
+ const Value *StoreValue[4];
+ int64_t StoreOffset[4];
+ uint64_t StoreSize[4];
unsigned NumStores;
public:
@@ -51,6 +68,7 @@ public:
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
+ virtual void Reset();
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
@@ -63,8 +81,8 @@ private:
bool &isFirst, bool &isSingle,bool &isCracked,
bool &isLoad, bool &isStore);
- bool isLoadOfStoredAddress(unsigned LoadSize,
- SDValue Ptr1, SDValue Ptr2) const;
+ bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6f204cc58636..5a04888dd45b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -18,7 +18,6 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
@@ -211,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ if (!BB->empty() && BB->back().isReturn()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = IP;
- while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != BB->begin() && (--I2)->isTerminator())
IP = I2;
// Emit: MTVRSAVE InVRSave
@@ -378,8 +377,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
APInt LKZ, LKO, RKZ, RKO;
- CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
- CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+ CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, RKZ, RKO);
unsigned TargetMask = LKZ.getZExtValue();
unsigned InsertMask = RKZ.getZExtValue();
@@ -603,7 +602,6 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
case ISD::SETULT: return 0;
case ISD::SETUGT: return 1;
}
- return 0;
}
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
@@ -1067,7 +1065,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Target = N->getOperand(1);
unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
- Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
Chain), 0);
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d6b8a9ee93c7..3b24951d1dc9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,26 +16,24 @@
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
@@ -104,6 +102,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+ // We do not currently implment this libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -147,9 +152,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
@@ -217,11 +226,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- // VAARG is custom lowered with the 32-bit SVR4 ABI.
- if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
- && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ }
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -333,7 +354,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -366,6 +389,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
+ setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
@@ -408,6 +434,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setInsertFencesForAtomic(true);
+ setSchedulingPreference(Sched::Hybrid);
+
computeRegisterProperties();
}
@@ -418,7 +446,16 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
// Darwin passes everything on 4 byte boundary.
if (TM.getSubtarget<PPCSubtarget>().isDarwin())
return 4;
- // FIXME SVR4 TBD
+
+ // 16byte and wider vectors are passed on 16byte boundary.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VTy->getBitWidth() >= 128)
+ return 16;
+
+ // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+ if (PPCSubTarget.isPPC64())
+ return 8;
+
return 4;
}
@@ -447,6 +484,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
case PPCISD::STD_32: return "PPCISD::STD_32";
case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
+ case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
case PPCISD::NOP: return "PPCISD::NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
@@ -822,14 +860,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
APInt LHSKnownZero, LHSKnownOne;
APInt RHSKnownZero, RHSKnownOne;
DAG.ComputeMaskedBits(N.getOperand(0),
- APInt::getAllOnesValue(N.getOperand(0)
- .getValueSizeInBits()),
LHSKnownZero, LHSKnownOne);
if (LHSKnownZero.getBoolValue()) {
DAG.ComputeMaskedBits(N.getOperand(1),
- APInt::getAllOnesValue(N.getOperand(1)
- .getValueSizeInBits()),
RHSKnownZero, RHSKnownOne);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
@@ -884,10 +918,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0),
- APInt::getAllOnesValue(N.getOperand(0)
- .getValueSizeInBits()),
- LHSKnownZero, LHSKnownOne);
+ DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
@@ -1000,10 +1031,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0),
- APInt::getAllOnesValue(N.getOperand(0)
- .getValueSizeInBits()),
- LHSKnownZero, LHSKnownOne);
+ DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
@@ -1223,7 +1251,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// extra load to get the address of the global.
if (MOHiFlag & PPCII::MO_NLP_FLAG)
Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return Ptr;
}
@@ -1319,11 +1347,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// areas
SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ false, 0);
InChain = OverflowArea.getValue(1);
SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ false, 0);
InChain = RegSaveArea.getValue(1);
// select overflow_area if index > 8
@@ -1372,7 +1402,8 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo(),
MVT::i32, false, false, 0);
- return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ false, false, false, 0);
}
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
@@ -1411,8 +1442,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
- /*isReturnValueUsed=*/true,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
@@ -1530,7 +1562,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
@@ -1557,7 +1589,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1581,8 +1613,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const unsigned *GetFPR() {
- static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+ static const uint16_t FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
@@ -1663,7 +1695,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = 4;
// Assign locations to all of the incoming arguments.
@@ -1681,7 +1714,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Arguments stored in registers.
if (VA.isRegLoc()) {
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
EVT ValVT = VA.getValVT();
switch (ValVT.getSimpleVT().SimpleTy) {
@@ -1721,7 +1754,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo(),
- false, false, 0));
+ false, false, false, 0));
}
}
@@ -1762,13 +1795,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const unsigned GPArgRegs[] = {
+ static const uint16_t GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const unsigned FPArgRegs[] = {
+ static const uint16_t FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1853,25 +1886,26 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -1882,7 +1916,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
@@ -1896,12 +1930,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
if (Flags.isByVal()) {
// ObjSize is the true size, ArgSize rounded up to multiple of regs.
- ObjSize = Flags.getByValSize();
+ unsigned ObjSize = Flags.getByValSize();
unsigned ArgSize =
((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
VecArgOffset += ArgSize;
@@ -2138,7 +2171,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
InVals.push_back(ArgVal);
@@ -2259,9 +2292,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
// Tail call needs the stack to be aligned.
- if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
+ if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+ getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign-1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
}
@@ -2295,7 +2328,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- if (!GuaranteedTailCallOpt)
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
return false;
// Variable argument functions are not supported.
@@ -2443,7 +2476,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
Chain = SDValue(LROpOut.getNode(), 1);
// When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
@@ -2451,7 +2484,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
if (isDarwinABI) {
FPOpOut = getFramePointerFrameIndex(DAG);
FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
Chain = SDValue(FPOpOut.getNode(), 1);
}
}
@@ -2748,7 +2781,14 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
int BytesCalleePops =
- (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -2776,9 +2816,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
}
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
// Add a NOP immediately after the branch instruction when using the 64-bit
// SVR4 ABI. At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
@@ -2787,8 +2824,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
+
+ bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
if (CallOpc == PPCISD::BCTRL_SVR4) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -2799,14 +2837,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// since r2 is a reserved register (which prevents the register allocator
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
- InFlag = Chain.getValue(1);
- } else {
+ needsTOCRestore = true;
+ } else if (CallOpc == PPCISD::CALL_SVR4) {
// Otherwise insert NOP.
- InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+ CallOpc = PPCISD::CALL_NOP_SVR4;
}
}
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ if (needsTOCRestore) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag);
@@ -2820,7 +2866,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
SDValue
PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2864,7 +2910,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -3071,7 +3118,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
unsigned nAltivecParamsAtEnd = 0;
@@ -3120,17 +3168,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -3138,7 +3186,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -3212,7 +3260,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
@@ -3250,7 +3298,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Float varargs are always shadowed in available integer registers
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
@@ -3259,7 +3308,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
@@ -3308,7 +3357,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
if (VR_idx != NumVRs) {
SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
}
@@ -3319,7 +3368,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
DAG.getConstant(i, PtrVT));
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
@@ -3483,7 +3532,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
// Load the old link SP.
SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Restore the stack pointer.
Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
@@ -3674,7 +3723,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -3718,7 +3767,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
Ops, 4, MVT::i64, MMO);
// Load the value as a double.
SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// FCFID it and return it.
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3770,7 +3819,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue Four = DAG.getConstant(4, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Transform as necessary
SDValue CWD1 =
@@ -4236,8 +4285,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
// perfect shuffle table to emit an optimal matching sequence.
- SmallVector<int, 16> PermMask;
- SVOp->getMask(PermMask);
+ ArrayRef<int> PermMask = SVOp->getMask();
unsigned PFIndexes[4];
bool isFourElementShuffle = true;
@@ -4441,7 +4489,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
false, false, 0);
// Load it out.
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -4549,7 +4597,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
}
- return SDValue();
}
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
@@ -4559,8 +4606,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
default:
- assert(false && "Do not know how to custom type legalize this operation!");
- return;
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
case ISD::VAARG: {
if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
|| TM.getSubtarget<PPCSubtarget>().isPPC64())
@@ -5461,12 +5507,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
//===----------------------------------------------------------------------===//
void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case PPCISD::LBRX: {
@@ -5700,7 +5745,7 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
return (V > -(1 << 16) && V < (1 << 16)-1);
}
-bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
return false;
}
@@ -5729,13 +5774,13 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address off the stack.
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -5749,7 +5794,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+ bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
!MF.getFunction()->hasFnAttr(Attribute::Naked);
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
@@ -5758,7 +5804,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
PtrVT);
while (Depth--)
FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
- FrameAddr, MachinePointerInfo(), false, false, 0);
+ FrameAddr, MachinePointerInfo(), false, false,
+ false, 0);
return FrameAddr;
}
@@ -5774,7 +5821,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
-/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// 'IsZeroVal' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
@@ -5782,7 +5829,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe,
+ bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
@@ -5791,3 +5838,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::i32;
}
}
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+ unsigned Directive = PPCSubTarget.getDarwinDirective();
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2)
+ return Sched::ILP;
+
+ return TargetLowering::getSchedulingPreference(N);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 430e45e80493..18eb07200307 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -15,10 +15,10 @@
#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "PPC.h"
#include "PPCSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
namespace llvm {
namespace PPCISD {
@@ -95,7 +95,9 @@ namespace llvm {
EXTSW_32,
/// CALL - A direct function call.
- CALL_Darwin, CALL_SVR4,
+ /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
+ /// SVR4 calls.
+ CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
/// NOP - Special NOP which follows 64-bit SVR4 calls.
NOP,
@@ -279,6 +281,7 @@ namespace llvm {
bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
@@ -293,7 +296,6 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -353,7 +355,7 @@ namespace llvm {
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
- /// 'NonScalarIntSafe' is true, that means it's safe to return a
+ /// 'IsZeroVal' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
@@ -361,7 +363,7 @@ namespace llvm {
/// target-independent logic.
virtual EVT
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe, bool MemcpyStrSrc,
+ bool IsZeroVal, bool MemcpyStrSrc,
MachineFunction &MF) const;
private:
@@ -437,8 +439,8 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -472,21 +474,21 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
SDValue
- LowerCall_Darwin(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+ LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
- LowerCall_SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index e88ad378ccd9..7f67a4159dfe 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1,10 +1,10 @@
-//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
-//
+//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the PowerPC 64-bit instructions. These patterns are used
@@ -64,13 +64,7 @@ let Defs = [LR8] in
PPC970_Unit_BRU;
// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the PPC64 non-callee saved registers.
- Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR8,CTR8,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_Darwin : IForm<18, 0, 1,
@@ -90,23 +84,29 @@ let isCall = 1, PPC970_Unit = 7,
// ELF 64 ABI Calls = Darwin ABI Calls
// Used to define BL8_ELF and BLA8_ELF
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the PPC64 non-callee saved registers.
- Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR8,CTR8,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1,
(outs), (ins calltarget:$func, variable_ops),
- "bl $func", BrB, []>; // See Pat patterns below.
+ "bl $func", BrB, []>; // See Pat patterns below.
+
+ let isCodeGenOnly = 1 in
+ def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func\n\tnop", BrB, []>;
+
def BLA8_ELF : IForm<18, 1, 1,
(outs), (ins aaddr:$func, variable_ops),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+
+ let isCodeGenOnly = 1 in
+ def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func\n\tnop", BrB,
+ [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
}
- let Uses = [CTR8, RM] in {
+ let Uses = [X11, CTR8, RM] in {
def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
(outs), (ins variable_ops),
"bctrl", BrB,
@@ -123,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
(BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
+ (BL8_NOP_ELF tglobaladdr:$dst)>;
+
def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
(BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
+ (BL8_NOP_ELF texternalsym:$dst)>;
+
def : Pat<(PPCnop),
(NOP)>;
@@ -223,6 +229,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+// 64-but CR instructions
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+ "", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -469,6 +487,12 @@ def RLDICR : MDForm_1<30, 1,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
"rldicr $rA, $rS, $SH, $ME", IntRotateD,
[]>, isPPC64;
+
+def RLWINM8 : MForm_2<21,
+ (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+
} // End FXU Operations.
@@ -500,7 +524,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
let mayLoad = 1 in
def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStGeneral,
+ "lhau $rD, $disp($rA)", LdStLoad,
[]>, RegConstraint<"$rA = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
@@ -510,38 +534,38 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
- "lbz $rD, $src", LdStGeneral,
+ "lbz $rD, $src", LdStLoad,
[(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
- "lhz $rD, $src", LdStGeneral,
+ "lhz $rD, $src", LdStLoad,
[(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
- "lwz $rD, $src", LdStGeneral,
+ "lwz $rD, $src", LdStLoad,
[(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
- "lbzx $rD, $src", LdStGeneral,
+ "lbzx $rD, $src", LdStLoad,
[(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
- "lhzx $rD, $src", LdStGeneral,
+ "lhzx $rD, $src", LdStLoad,
[(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
- "lwzx $rD, $src", LdStGeneral,
+ "lwzx $rD, $src", LdStLoad,
[(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
let mayLoad = 1 in {
def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStGeneral,
+ "lbzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStGeneral,
+ "lhzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStGeneral,
+ "lwzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -557,7 +581,8 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"",
[(set G8RC:$rD,
(PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
-
+
+let hasSideEffects = 1 in {
let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
@@ -567,6 +592,7 @@ let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
"ld 2, 40(1)", LdStLD,
[(PPCtoc_restore)]>, isPPC64;
+}
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
[(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
@@ -587,24 +613,24 @@ def : Pat<(PPCload xaddr:$src),
let PPC970_Unit = 2 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
- "stb $rS, $src", LdStGeneral,
+ "stb $rS, $src", LdStStore,
[(truncstorei8 G8RC:$rS, iaddr:$src)]>;
def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
- "sth $rS, $src", LdStGeneral,
+ "sth $rS, $src", LdStStore,
[(truncstorei16 G8RC:$rS, iaddr:$src)]>;
def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
- "stw $rS, $src", LdStGeneral,
+ "stw $rS, $src", LdStStore,
[(truncstorei32 G8RC:$rS, iaddr:$src)]>;
def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
- "stbx $rS, $dst", LdStGeneral,
+ "stbx $rS, $dst", LdStStore,
[(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
- "sthx $rS, $dst", LdStGeneral,
+ "sthx $rS, $dst", LdStStore,
[(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
- "stwx $rS, $dst", LdStGeneral,
+ "stwx $rS, $dst", LdStStore,
[(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
// Normal 8-byte stores.
@@ -621,14 +647,14 @@ let PPC970_Unit = 2 in {
def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "stbu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "sthu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 256370fa5f52..6c0f3d3f06e5 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1,10 +1,10 @@
-//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
-//
+//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the Altivec extension to the PowerPC instruction set.
@@ -188,85 +188,85 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", LdStGeneral /*FIXME*/, []>;
+ "dss $STRM", LdStLoad /*FIXME*/, []>;
def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", LdStGeneral /*FIXME*/, []>;
+ "dssall", LdStLoad /*FIXME*/, []>;
def DST : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
- "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTT : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
- "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTST : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
- "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTSTT : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
- "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DST64 : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
- "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTT64 : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
- "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTST64 : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
- "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
- "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
- "mfvscr $vD", LdStGeneral,
+ "mfvscr $vD", LdStStore,
[(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
- "mtvscr $vB", LdStGeneral,
+ "mtvscr $vB", LdStLoad,
[(int_ppc_altivec_mtvscr VRRC:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
- "lvebx $vD, $src", LdStGeneral,
+ "lvebx $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
- "lvehx $vD, $src", LdStGeneral,
+ "lvehx $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
- "lvewx $vD, $src", LdStGeneral,
+ "lvewx $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
- "lvx $vD, $src", LdStGeneral,
+ "lvx $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
- "lvxl $vD, $src", LdStGeneral,
+ "lvxl $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
- "lvsl $vD, $src", LdStGeneral,
+ "lvsl $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
- "lvsr $vD, $src", LdStGeneral,
+ "lvsr $vD, $src", LdStLoad,
[(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
- "stvebx $rS, $dst", LdStGeneral,
+ "stvebx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
- "stvehx $rS, $dst", LdStGeneral,
+ "stvehx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
- "stvewx $rS, $dst", LdStGeneral,
+ "stvewx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
- "stvx $rS, $dst", LdStGeneral,
+ "stvx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
- "stvxl $rS, $dst", LdStGeneral,
+ "stvxl $rS, $dst", LdStStore,
[(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
}
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 84a15b1ca942..d8e4b2bdf34a 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1,10 +1,10 @@
//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+// Two joined instructions; used to emit two adjacent instructions as one.
+// The itinerary from the first instruction is used for scheduling and
+// classification.
+class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : Instruction {
+ field bits<64> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode1;
+ let Inst{32-37} = opcode2;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+}
// 1.7.1 I-Form
class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
@@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Addr = 0;
}
+class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = A;
+ let Inst{43-47} = Addr{20-16}; // Base Reg
+ let Inst{48-63} = Addr{15-0}; // Displacement
+}
+
+// This is used to emit BL8+NOP.
+class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : IForm_and_DForm_1<opcode1, aa, lk, opcode2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2bc109c8785a..b45ada9db32a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -34,8 +34,8 @@
#include "PPCGenInstrInfo.inc"
namespace llvm {
-extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
-extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> DisablePPC32RS;
+extern cl::opt<bool> DisablePPC64RS;
}
using namespace llvm;
@@ -49,13 +49,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
- // Should use subtarget info to pick the right hazard recognizer. For
- // now, always return a PPC970 recognizer.
- const TargetInstrInfo *TII = TM->getInstrInfo();
- assert(TII && "No InstrInfo?");
- return new PPCHazardRecognizer970(*TII);
+ unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new PPCScoreboardHazardRecognizer(II, DAG);
+ }
+
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
}
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ // Most subtargets use a PPC970 recognizer.
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ assert(TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*TII);
+ }
+
+ return new PPCScoreboardHazardRecognizer(II, DAG);
+}
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -327,6 +346,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
+// This function returns true if a CR spill is necessary and false otherwise.
bool
PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
@@ -358,7 +378,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
} else {
// FIXME: this spills LR immediately to memory in one step. To do this,
- // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // we use X11, which we know cannot be used in the prolog/epilog. This is
// a hack.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
@@ -377,9 +397,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
- if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- // FIXME (64-bit): Enable
+ if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
.addReg(SrcReg,
getKillRegState(isKill)),
@@ -392,11 +411,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// We hack this on Darwin by reserving R2. It's probably broken on Linux
// at the moment.
+ bool is64Bit = TM.getSubtargetImpl()->isPPC64();
// We need to store the CR in the low 4-bits of the saved value. First,
// issue a MFCR to save all of the CRBits.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ (is64Bit ? PPC::X2 : PPC::R2) :
+ (is64Bit ? PPC::X0 : PPC::R0);
+ NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
+ PPC::MFCRpseud), ScratchReg)
.addReg(SrcReg, getKillRegState(isKill)));
// If the saved register wasn't CR0, shift the bits left so that they are
@@ -404,12 +426,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
// rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
+ PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(ShiftBits)
.addImm(0).addImm(31));
}
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
+ PPC::STW8 : PPC::STW))
.addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
@@ -486,15 +510,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
-void
+bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
@@ -514,8 +537,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
} else {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
- PPC::R11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ PPC::X11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
}
} else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
@@ -524,28 +547,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
+ if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg)
+ , FrameIdx));
+ return true;
+ } else {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
+ PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
}
-
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
} else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -590,6 +622,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
} else {
llvm_unreachable("Unknown regclass!");
}
+
+ return false;
}
void
@@ -602,14 +636,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -649,6 +685,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
return 0;
+ case PPC::BL8_NOP_ELF:
+ case PPC::BLA8_NOP_ELF:
+ return 8;
default:
return 4; // PowerPC instructions are all 4 bytes
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 90bacc96c87e..7d49aa129e36 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -1,4 +1,4 @@
-//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC32_INSTRUCTIONINFO_H
-#define POWERPC32_INSTRUCTIONINFO_H
+#ifndef POWERPC_INSTRUCTIONINFO_H
+#define POWERPC_INSTRUCTIONINFO_H
#include "PPC.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "PPCGenInstrInfo.inc"
@@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -88,6 +88,9 @@ public:
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f248b5ba8c48..748486c1ca26 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1,10 +1,10 @@
-//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
-//
+//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the subset of the 32-bit PowerPC instruction set, as used
@@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -349,10 +352,10 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
-
+def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
@@ -399,7 +402,14 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
-def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+let mayStore = 1 in
+def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+ "", []>;
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
"", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
@@ -431,13 +441,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the non-callee saved registers...
- Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR,CTR,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_Darwin : IForm<18, 0, 1,
@@ -456,13 +460,7 @@ let isCall = 1, PPC970_Unit = 7,
}
// SVR4 ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the non-callee saved registers...
- Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR,CTR,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_SVR4 : IForm<18, 0, 1,
@@ -547,6 +545,9 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
"dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
+ (DCBT xoaddr:$dst)>;
+
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@@ -642,7 +643,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
-def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
@@ -651,17 +652,17 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
// Unindexed (r+i) Loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
- "lbz $rD, $src", LdStGeneral,
+ "lbz $rD, $src", LdStLoad,
[(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
[(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
- "lhz $rD, $src", LdStGeneral,
+ "lhz $rD, $src", LdStLoad,
[(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
- "lwz $rD, $src", LdStGeneral,
+ "lwz $rD, $src", LdStLoad,
[(set GPRC:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
@@ -675,22 +676,22 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStGeneral,
+ "lbzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", LdStGeneral,
+ "lhau $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStGeneral,
+ "lhzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStGeneral,
+ "lwzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
@@ -710,25 +711,25 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
//
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
- "lbzx $rD, $src", LdStGeneral,
+ "lbzx $rD, $src", LdStLoad,
[(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
[(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
- "lhzx $rD, $src", LdStGeneral,
+ "lhzx $rD, $src", LdStLoad,
[(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
- "lwzx $rD, $src", LdStGeneral,
+ "lwzx $rD, $src", LdStLoad,
[(set GPRC:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
- "lhbrx $rD, $src", LdStGeneral,
+ "lhbrx $rD, $src", LdStLoad,
[(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
- "lwbrx $rD, $src", LdStGeneral,
+ "lwbrx $rD, $src", LdStLoad,
[(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
@@ -746,13 +747,13 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
- "stb $rS, $src", LdStGeneral,
+ "stb $rS, $src", LdStStore,
[(truncstorei8 GPRC:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
- "sth $rS, $src", LdStGeneral,
+ "sth $rS, $src", LdStStore,
[(truncstorei16 GPRC:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
- "stw $rS, $src", LdStGeneral,
+ "stw $rS, $src", LdStStore,
[(store GPRC:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
"stfs $rS, $dst", LdStUX,
@@ -766,33 +767,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
let PPC970_Unit = 2 in {
def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "stbu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "sthu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "stwu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "stfsu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+ "stfdu $rS, $ptroff($ptrreg)", LdStStore,
[(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
@@ -803,29 +804,29 @@ def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
//
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
- "stbx $rS, $dst", LdStGeneral,
+ "stbx $rS, $dst", LdStStore,
[(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
- "sthx $rS, $dst", LdStGeneral,
+ "sthx $rS, $dst", LdStStore,
[(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
- "stwx $rS, $dst", LdStGeneral,
+ "stwx $rS, $dst", LdStStore,
[(store GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
let mayStore = 1 in {
def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
- "stwux $rS, $rA, $rB", LdStGeneral,
+ "stwux $rS, $rA, $rB", LdStStore,
[]>;
}
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
- "sthbrx $rS, $dst", LdStGeneral,
+ "sthbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
- "stwbrx $rS, $dst", LdStGeneral,
+ "stwbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
@@ -1091,7 +1092,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
-def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 4590f0045641..a6528c0d7030 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -291,9 +291,10 @@ void PPC64CompilationCallback() {
}
#endif
-extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
+extern "C" {
+static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
// Adjust the pointer to the address of the call instruction in the stub
// emitted by emitFunctionStub, rather than the instruction after it.
unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
@@ -337,6 +338,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
// stack after we restore all regs.
return Target;
}
+}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 47ead59b587d..2f8243a597e6 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -1,4 +1,4 @@
-//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 33af4269a3ad..276edcb69d19 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -140,7 +140,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
switch (MO.getType()) {
default:
MI->dump();
- assert(0 && "unknown operand type");
+ llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::CreateReg(MO.getReg());
@@ -166,6 +166,8 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
isDarwin);
break;
+ case MachineOperand::MO_RegisterMask:
+ continue;
}
OutMI.addOperand(MCOp);
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..6a0aec842be7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -0,0 +1,15 @@
+//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void PPCFunctionInfo::anchor() { }
+
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index e2649c8b380f..24caffa3f0f2 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -21,7 +21,8 @@ namespace llvm {
/// PPCFunctionInfo - This class is derived from MachineFunction private
/// PowerPC target-specific information for each MachineFunction.
class PPCFunctionInfo : public MachineFunctionInfo {
-private:
+ virtual void anchor();
+
/// FramePointerSaveIndex - Frame index of where the old frame pointer is
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 3164e33faae9..17b836d1ed97 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2e90b7a4086b..ef1357137def 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "reginfo"
+#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCRegisterInfo.h"
#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
#include "llvm/CallingConv.h"
@@ -46,15 +46,14 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-// FIXME (64-bit): Eventually enable by default.
namespace llvm {
-cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
cl::init(false),
- cl::desc("Enable PPC32 register scavenger"),
+ cl::desc("Disable PPC32 register scavenger"),
cl::Hidden);
-cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
cl::init(false),
- cl::desc("Enable PPC64 register scavenger"),
+ cl::desc("Disable PPC64 register scavenger"),
cl::Hidden);
}
@@ -63,8 +62,8 @@ using namespace llvm;
// FIXME (64-bit): Should be inlined.
bool
PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
- (EnablePPC64RS && Subtarget.isPPC64()));
+ return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
+ (!DisablePPC64RS && Subtarget.isPPC64()));
}
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
@@ -99,122 +98,22 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
return &PPC::GPRCRegClass;
}
-const unsigned*
+const uint16_t*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- // 32-bit Darwin calling convention.
- static const unsigned Darwin32_CalleeSavedRegs[] = {
- PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
- PPC::LR, 0
- };
-
- // 32-bit SVR4 calling convention.
- static const unsigned SVR4_CalleeSavedRegs[] = {
- PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
-
- PPC::VRSAVE,
-
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
- 0
- };
- // 64-bit Darwin calling convention.
- static const unsigned Darwin64_CalleeSavedRegs[] = {
- PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
- PPC::LR8, 0
- };
-
- // 64-bit SVR4 calling convention.
- static const unsigned SVR4_64_CalleeSavedRegs[] = {
- PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
-
- PPC::VRSAVE,
-
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
+ CSR_Darwin32_SaveList;
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+}
- 0
- };
-
+const unsigned*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
- Darwin32_CalleeSavedRegs;
+ return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
+ CSR_Darwin32_RegMask;
- return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+ return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -247,9 +146,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R13);
Reserved.set(PPC::R31);
- if (!requiresRegisterScavenging(MF))
- Reserved.set(PPC::R0); // FIXME (64-bit): Remove
-
Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
@@ -259,7 +155,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // Reserve X2 on Darwin to hack around the problem of save/restore of CR
// when the stack frame is too big to address directly; we need two regs.
// This is a hack.
if (Subtarget.isDarwinABI()) {
@@ -273,6 +169,29 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+unsigned
+PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const unsigned DefaultSafety = 1;
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 32 - FP - DefaultSafety;
+ }
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ return 32 - DefaultSafety;
+ case PPC::CRRCRegClassID:
+ return 8 - DefaultSafety;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -280,7 +199,8 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
void PPCRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
if (int CalleeAmt = I->getOperand(1).getImm()) {
bool is64Bit = Subtarget.isPPC64();
@@ -295,8 +215,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
DebugLoc dl = MI->getDebugLoc();
if (isInt<16>(CalleeAmt)) {
- BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
- addImm(CalleeAmt);
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
} else {
MachineBasicBlock::iterator MBBI = I;
BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
@@ -304,9 +225,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
.addReg(TmpReg, RegState::Kill)
.addImm(CalleeAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
- .addReg(StackReg)
- .addReg(StackReg)
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
.addReg(TmpReg);
}
}
@@ -403,12 +323,12 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
.addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
+ .addReg(PPC::X1, RegState::Define)
.addReg(MI.getOperand(1).getReg());
else
BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
.addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
+ .addReg(PPC::X1, RegState::Define)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
@@ -424,7 +344,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
} else {
BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
.addReg(Reg, RegState::Kill)
- .addReg(PPC::R1)
+ .addReg(PPC::R1, RegState::Define)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
@@ -455,28 +375,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex, int SPAdj,
RegScavenger *RS) const {
// Get the instruction.
- MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
- unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
bool LP64 = Subtarget.isPPC64();
+ unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
+ (LP64 ? PPC::X0 : PPC::R0);
+ unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
- BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0)
// rlwinm rA, rA, ShiftBits, 0, 31.
- BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
.addReg(Reg, RegState::Kill)
.addImm(getPPCRegisterNumbering(SrcReg) * 4)
.addImm(0)
@@ -490,6 +414,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ bool LP64 = Subtarget.isPPC64();
+ unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
+ (LP64 ? PPC::X0 : PPC::R0);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CR does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31);
+ }
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(Reg);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const {
@@ -535,16 +501,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- // Special case for pseudo-op SPILL_CR.
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
+ if (requiresRegisterScavenging(MF)) {
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex, SPAdj, RS);
+ return;
}
+ }
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+
+ bool is64Bit = Subtarget.isPPC64();
MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
- PPC::R31 : PPC::R1,
+ (is64Bit ? PPC::X31 : PPC::R31) :
+ (is64Bit ? PPC::X1 : PPC::R1),
false);
// Figure out if the offset in the instruction is shifted right two bits. This
@@ -581,7 +554,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// clear can be encoded. This is extremely uncommon, because normally you
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
- if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+ (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -590,19 +564,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- // FIXME: figure out what SPAdj is doing here.
- // FIXME (64-bit): Use "findScratchRegister".
unsigned SReg;
- if (requiresRegisterScavenging(MF))
- SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
- else
- SReg = PPC::R0;
+ if (requiresRegisterScavenging(MF)) {
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
+ } else
+ SReg = is64Bit ? PPC::X0 : PPC::R0;
// Insert a set of rA with the full offset value before the ld, st, or add
- BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
.addImm(Offset >> 16);
- BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
.addReg(SReg, RegState::Kill)
.addImm(Offset);
@@ -624,7 +598,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
- MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 1cc7213417d1..b1e6a7218ee7 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -37,8 +37,12 @@ public:
/// This is used for addressing modes.
virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -54,6 +58,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 1acdf4eb853b..0e55313b135f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1,10 +1,10 @@
-//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
-//
+//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
//
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
index a33e7e03370c..0b392f99b6d7 100644
--- a/lib/Target/PowerPC/PPCRelocations.h
+++ b/lib/Target/PowerPC/PPCRelocations.h
@@ -1,4 +1,4 @@
-//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC32RELOCATIONS_H
-#define PPC32RELOCATIONS_H
+#ifndef PPCRELOCATIONS_H
+#define PPCRELOCATIONS_H
#include "llvm/CodeGen/MachineRelocation.h"
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 9664f1457171..8c0a8589052a 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -1,10 +1,10 @@
-//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
-//
+//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -50,7 +50,8 @@ def BrMCRX : InstrItinClass;
def LdStDCBA : InstrItinClass;
def LdStDCBF : InstrItinClass;
def LdStDCBI : InstrItinClass;
-def LdStGeneral : InstrItinClass;
+def LdStLoad : InstrItinClass;
+def LdStStore : InstrItinClass;
def LdStDSS : InstrItinClass;
def LdStICBI : InstrItinClass;
def LdStUX : InstrItinClass;
@@ -103,9 +104,11 @@ def VecVSR : InstrItinClass;
// Processor instruction itineraries.
include "PPCScheduleG3.td"
+include "PPCSchedule440.td"
include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
+include "PPCScheduleA2.td"
//===----------------------------------------------------------------------===//
// Instruction to itinerary class map - When add new opcodes to the supported
@@ -149,8 +152,8 @@ include "PPCScheduleG5.td"
// dcbf LdStDCBF
// dcbi LdStDCBI
// dcbst LdStDCBF
-// dcbt LdStGeneral
-// dcbtst LdStGeneral
+// dcbt LdStLoad
+// dcbtst LdStLoad
// dcbz LdStDCBF
// divd IntDivD
// divdu IntDivD
@@ -159,9 +162,9 @@ include "PPCScheduleG5.td"
// dss LdStDSS
// dst LdStDSS
// dstst LdStDSS
-// eciwx LdStGeneral
-// ecowx LdStGeneral
-// eieio LdStGeneral
+// eciwx LdStLoad
+// ecowx LdStLoad
+// eieio LdStLoad
// eqv IntGeneral
// extsb IntGeneral
// extsh IntGeneral
@@ -201,10 +204,10 @@ include "PPCScheduleG5.td"
// fsubs FPGeneral
// icbi LdStICBI
// isync SprISYNC
-// lbz LdStGeneral
-// lbzu LdStGeneral
+// lbz LdStLoad
+// lbzu LdStLoad
// lbzux LdStUX
-// lbzx LdStGeneral
+// lbzx LdStLoad
// ld LdStLD
// ldarx LdStLDARX
// ldu LdStLD
@@ -222,11 +225,11 @@ include "PPCScheduleG5.td"
// lhau LdStLHA
// lhaux LdStLHA
// lhax LdStLHA
-// lhbrx LdStGeneral
-// lhz LdStGeneral
-// lhzu LdStGeneral
+// lhbrx LdStLoad
+// lhz LdStLoad
+// lhzu LdStLoad
// lhzux LdStUX
-// lhzx LdStGeneral
+// lhzx LdStLoad
// lmw LdStLMW
// lswi LdStLMW
// lswx LdStLMW
@@ -241,11 +244,11 @@ include "PPCScheduleG5.td"
// lwarx LdStLWARX
// lwaux LdStLHA
// lwax LdStLHA
-// lwbrx LdStGeneral
-// lwz LdStGeneral
-// lwzu LdStGeneral
+// lwbrx LdStLoad
+// lwz LdStLoad
+// lwzu LdStLoad
// lwzux LdStUX
-// lwzx LdStGeneral
+// lwzx LdStLoad
// mcrf BrMCR
// mcrfs FPGeneral
// mcrxr BrMCRX
@@ -306,10 +309,10 @@ include "PPCScheduleG5.td"
// srawi IntShift
// srd IntRotateD
// srw IntGeneral
-// stb LdStGeneral
-// stbu LdStGeneral
-// stbux LdStGeneral
-// stbx LdStGeneral
+// stb LdStStore
+// stbu LdStStore
+// stbux LdStStore
+// stbx LdStStore
// std LdStSTD
// stdcx. LdStSTDCX
// stdu LdStSTD
@@ -324,11 +327,11 @@ include "PPCScheduleG5.td"
// stfsu LdStUX
// stfsux LdStUX
// stfsx LdStUX
-// sth LdStGeneral
-// sthbrx LdStGeneral
-// sthu LdStGeneral
-// sthux LdStGeneral
-// sthx LdStGeneral
+// sth LdStStore
+// sthbrx LdStStore
+// sthu LdStStore
+// sthux LdStStore
+// sthx LdStStore
// stmw LdStLMW
// stswi LdStLMW
// stswx LdStLMW
@@ -337,12 +340,12 @@ include "PPCScheduleG5.td"
// stvewx LdStSTVEBX
// stvx LdStSTVEBX
// stvxl LdStSTVEBX
-// stw LdStGeneral
-// stwbrx LdStGeneral
+// stw LdStStore
+// stwbrx LdStStore
// stwcx. LdStSTWCX
-// stwu LdStGeneral
-// stwux LdStGeneral
-// stwx LdStGeneral
+// stwu LdStStore
+// stwux LdStStore
+// stwx LdStStore
// subf IntGeneral
// subfc IntGeneral
// subfe IntGeneral
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 000000000000..419faea30220
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,616 @@
+//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User's Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def IFTH1 : FuncUnit; // Fetch unit 1
+def IFTH2 : FuncUnit; // Fetch unit 2
+def PDCD1 : FuncUnit; // Decode unit 1
+def PDCD2 : FuncUnit; // Decode unit 2
+def DISS1 : FuncUnit; // Issue unit 1
+def DISS2 : FuncUnit; // Issue unit 2
+def LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def IWB : FuncUnit; // Write-back unit for the I pipeline
+def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def JWB : FuncUnit; // Write-back unit for the J pipeline
+def AGEN : FuncUnit; // Address generation for the L pipeline
+def CRD : FuncUnit; // D-cache access for the L pipeline
+def LWB : FuncUnit; // Write-back unit for the L pipeline
+def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def FWB : FuncUnit; // Write-back unit for the F pipeline
+
+def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
+
+def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+// - Store instructions will hold in the AGEN stage
+// - The integer multiply-accumulate instruction will hold in
+// the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+ [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
+ IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
+ FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
+ [GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<33, [IWB]>],
+ [40, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<3, [AGEN], 1>,
+ InstrStage<2, [CRD], 1>,
+ InstrStage<1, [LWB]>]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC], 0>,
+ InstrStage<1, [LRACC], 0>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [FEXE1], 0>,
+ InstrStage<1, [AGEN], 0>,
+ InstrStage<1, [JEXE1], 0>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [FEXE2], 0>,
+ InstrStage<1, [CRD], 0>,
+ InstrStage<1, [JEXE2], 0>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<6, [FEXE3], 0>,
+ InstrStage<6, [LWB], 0>,
+ InstrStage<6, [JWB], 0>,
+ InstrStage<6, [IWB]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [9, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<25, [FWB]>],
+ [35, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<13, [FWB]>],
+ [23, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4],
+ [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
new file mode 100644
index 000000000000..857ba40ff622
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -0,0 +1,652 @@
+//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// A2 Processor User's Manual.
+// IBM (as updated in) 2010.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC A2 chip sets
+//
+def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1
+def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2
+def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3
+def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4
+def IU4_0 : FuncUnit; // Instruction buffer slot 1
+def IU4_1 : FuncUnit; // Instruction buffer slot 2
+def IU4_2 : FuncUnit; // Instruction buffer slot 3
+def IU4_3 : FuncUnit; // Instruction buffer slot 4
+def IU4_4 : FuncUnit; // Instruction buffer slot 5
+def IU4_5 : FuncUnit; // Instruction buffer slot 6
+def IU4_6 : FuncUnit; // Instruction buffer slot 7
+def IU4_7 : FuncUnit; // Instruction buffer slot 8
+def IU5 : FuncUnit; // Dependency resolution
+def IU6 : FuncUnit; // Instruction issue
+def RF0 : FuncUnit;
+def XRF1 : FuncUnit;
+def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline
+def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline
+def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline
+def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline
+def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline
+def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline
+def FRF1 : FuncUnit;
+def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline
+def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline
+def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline
+def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline
+def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline
+def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline
+
+def CR_Bypass : Bypass; // The bypass for condition regs.
+//def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+//def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+//
+// This file defines the itinerary class data for the PPC A2 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPCA2Itineraries : ProcessorItineraries<
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3,
+ IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7,
+ IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
+ FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntGeneral , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>],
+ [53, 7, 7],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 11],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 11],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 11],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStore , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStICBI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStUX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<LdStLFD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHA , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [26, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>,
+ InstrItinData<SprISYNC , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [GPR_Bypass, NoBypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
+ InstrItinData<SprMFCR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7],
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [GPR_Bypass, NoBypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [15, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprRFI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprSC , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+ [29, 7],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [13, 7, 7],
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>,
+ InstrStage<71, [FEX1], 0>,
+ InstrStage<71, [FEX2], 0>,
+ InstrStage<71, [FEX3], 0>,
+ InstrStage<71, [FEX4], 0>,
+ InstrStage<71, [FEX5], 0>,
+ InstrStage<71, [FEX6]>],
+ [86, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>,
+ InstrStage<58, [FEX1], 0>,
+ InstrStage<58, [FEX2], 0>,
+ InstrStage<58, [FEX3], 0>,
+ InstrStage<58, [FEX4], 0>,
+ InstrStage<58, [FEX5], 0>,
+ InstrStage<58, [FEX6]>],
+ [73, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPSqrt , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>,
+ InstrStage<68, [FEX1], 0>,
+ InstrStage<68, [FEX2], 0>,
+ InstrStage<68, [FEX3], 0>,
+ InstrStage<68, [FEX4], 0>,
+ InstrStage<68, [FEX5], 0>,
+ InstrStage<68, [FEX6]>],
+ [86, 7], // FIXME: should be [86, 7] for double
+ // and [82, 7] for single. Likewise,
+ // the FEX? cycle count should be 68
+ // for double and 64 for single.
+ [NoBypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7],
+ [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index ad4da1fe224f..bc926f7bb2b6 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
-//
+//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the G3 (750) processor.
@@ -32,7 +32,8 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 03c3b29cc101..f7ec1e01333e 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
-//
+//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the G4 (7400) processor.
@@ -31,7 +31,8 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 00cac3c7cab2..37ebfc59880b 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
-//
+//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the G4+ (7450) processor.
@@ -34,7 +34,8 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 1671f22b30ad..d1e40cef9639 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
-//
+//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the G5 (970) processor.
@@ -35,7 +35,8 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index cf194de42e8f..f405b4711a52 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
+#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
@@ -74,6 +75,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, HasAltivec(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
, TargetTriple(TT) {
@@ -139,3 +141,23 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage() || isDecl;
}
+
+bool PPCSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2)
+ Mode = TargetSubtargetInfo::ANTIDEP_ALL;
+ else
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+
+ CriticalPathRCs.clear();
+
+ if (isPPC64())
+ CriticalPathRCs.push_back(&PPC::G8RCRegClass);
+ else
+ CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+ return OptLevel >= CodeGenOpt::Default;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e028de6b09de..a275029d3e5d 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -33,12 +33,14 @@ namespace PPC {
enum {
DIR_NONE,
DIR_32,
+ DIR_440,
DIR_601,
DIR_602,
DIR_603,
DIR_7400,
DIR_750,
DIR_970,
+ DIR_A2,
DIR_64
};
}
@@ -66,6 +68,7 @@ protected:
bool HasAltivec;
bool HasFSQRT;
bool HasSTFIWX;
+ bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -136,15 +139,22 @@ public:
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
bool isGigaProcessor() const { return IsGigaProcessor; }
+ bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
/// isDarwin - True if this is any darwin platform.
bool isDarwin() const { return TargetTriple.isMacOSX(); }
+ /// isBGP - True if this is a BG/P platform.
+ bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index f5744b830489..d11397669912 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "PPCTargetMachine.h"
+#include "PPC.h"
#include "llvm/PassManager.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -22,37 +23,46 @@ using namespace llvm;
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
- RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
+ RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
}
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
+
+ // The binutils for the BG/P are too old for CFI.
+ if (Subtarget.isBGP())
+ setMCUseCFI(false);
}
-/// Override this for PowerPC. Tail merging happily breaks up instruction issue
-/// groups, which typically degrades performance.
-bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+void PPC32TargetMachine::anchor() { }
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
+void PPC64TargetMachine::anchor() { }
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
@@ -60,33 +70,56 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
-bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+namespace {
+/// PPC Code Generator Pass Configuration Options.
+class PPCPassConfig : public TargetPassConfig {
+public:
+ PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ PPCTargetMachine &getPPCTargetMachine() const {
+ return getTM<PPCTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
+ TargetPassConfig *PassConfig = new PPCPassConfig(this, PM);
+
+ // Override this for PowerPC. Tail merging happily breaks up instruction issue
+ // groups, which typically degrades performance.
+ PassConfig->setEnableTailMerge(false);
+
+ return PassConfig;
+}
+
+bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- PM.add(createPPCISelDag(*this));
+ PM.add(createPPCISelDag(getPPCTargetMachine()));
return false;
}
-bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool PPCPassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
PM.add(createPPCBranchSelectionPass());
return false;
}
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
// FIXME: This should be moved to TargetJITInfo!!
if (Subtarget.isPPC64())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
- DisableJumpTables = true;
-
+ Options.DisableJumpTables = true;
+
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
Subtarget.SetJITMode();
-
+
// Machine code emitter pass for PowerPC.
PM.add(createPPCJITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index d06f0843bd6d..7da2b0cb10c1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,8 +24,6 @@
#include "llvm/Target/TargetData.h"
namespace llvm {
-class PassManager;
-class GlobalValue;
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
@@ -41,15 +39,16 @@ class PPCTargetMachine : public LLVMTargetMachine {
public:
PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM, bool is64Bit);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64Bit);
virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const PPCFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
- virtual const PPCTargetLowering *getTargetLowering() const {
+ virtual const PPCTargetLowering *getTargetLowering() const {
return &TLInfo;
}
virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
@@ -58,37 +57,39 @@ public:
virtual const PPCRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
-
+
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const InstrItineraryData *getInstrItineraryData() const {
+ virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
- virtual bool getEnableTailMergeDefault() const;
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
///
class PPC32TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
public:
PPC32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// PPC64TargetMachine - PowerPC 64-bit target machine.
///
class PPC64TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
public:
PPC64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index f63111f465c3..fdb8a62b9d24 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo
PowerPCTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..f77d85b15ab9
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCInfo
+parent = PowerPC
+required_libraries = MC Support Target
+add_to_library_groups = PowerPC
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 1f69ffb09c0a..093255e6af2d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,22 +2,6 @@ Target Independent Opportunities:
//===---------------------------------------------------------------------===//
-With the recent changes to make the implicit def/use set explicit in
-machineinstrs, we should change the target descriptions for 'call' instructions
-so that the .td files don't list all the call-clobbered registers as implicit
-defs. Instead, these should be added by the code generator (e.g. on the dag).
-
-This has a number of uses:
-
-1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
- for their different impdef sets.
-2. Targets with multiple calling convs (e.g. x86) which have different clobber
- sets don't need copies of call instructions.
-3. 'Interprocedural register allocation' can be done to reduce the clobber sets
- of calls.
-
-//===---------------------------------------------------------------------===//
-
We should recognized various "overflow detection" idioms and translate them into
llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom:
@@ -961,6 +945,25 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
//===---------------------------------------------------------------------===//
+int g(int x) { return (x - 10) < 0; }
+Should combine to "x <= 9" (the sub has nsw). Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int g(int x) { return (x + 10) < 0; }
+Should combine to "x < -10" (the add has nsw). Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int f(int i, int j) { return i < j + 1; }
+int g(int i, int j) { return j > i - 1; }
+Should combine to "i <= j" (the add/sub has nsw). Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
This was noticed in the entryblock for grokdeclarator in 403.gcc:
%tmp = icmp eq i32 %decl_context, 4
@@ -2358,3 +2361,8 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
should fold to x > y.
//===---------------------------------------------------------------------===//
+
+int f(double x) { return __builtin_fabs(x) < 0.0; }
+should fold to false.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 5b87849b9d17..ae4af0f44250 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -1,11 +1,11 @@
set(LLVM_TARGET_DEFINITIONS Sparc.td)
-llvm_tablegen(SparcGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(SparcGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(SparcGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(SparcGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(SparcGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM SparcGenCallingConv.inc -gen-callingconv)
add_public_tablegen_target(SparcCommonTableGen)
add_llvm_target(SparcCodeGen
@@ -16,23 +16,12 @@ add_llvm_target(SparcCodeGen
SparcISelDAGToDAG.cpp
SparcISelLowering.cpp
SparcFrameLowering.cpp
+ SparcMachineFunctionInfo.cpp
SparcRegisterInfo.cpp
SparcSubtarget.cpp
SparcTargetMachine.cpp
SparcSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSparcDesc
- LLVMSparcInfo
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index dab35e5e4e6f..883aa3a497c4 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
@@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
}
//Call's delay filler can def some of call's uses.
- if (slot->getDesc().isCall())
+ if (slot->isCall())
insertCallUses(slot, RegUses);
else
insertDefsUses(slot, RegDefs, RegUses);
@@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (I->hasUnmodeledSideEffects()
|| I->isInlineAsm()
|| I->isLabel()
- || I->getDesc().hasDelaySlot()
+ || I->hasDelaySlot()
|| isDelayFiller(MBB, I))
break;
@@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
if (candidate->isImplicitDef() || candidate->isKill())
return true;
- if (candidate->getDesc().mayLoad()) {
+ if (candidate->mayLoad()) {
sawLoad = true;
if (sawStore)
return true;
}
- if (candidate->getDesc().mayStore()) {
+ if (candidate->mayStore()) {
if (sawStore)
return true;
sawStore = true;
@@ -282,7 +282,7 @@ bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
if (RegSet.count(Reg))
return true;
// check Aliased Registers
- for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
*Alias; ++ Alias)
if (RegSet.count(*Alias))
return true;
@@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
return false;
if (candidate->getOpcode() == SP::UNIMP)
return true;
- const MCInstrDesc &prevdesc = (--candidate)->getDesc();
- return prevdesc.hasDelaySlot();
+ --candidate;
+ return candidate->hasDelaySlot();
}
bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
{
- if (!I->getDesc().isCall())
+ if (!I->isCall())
return false;
unsigned structSizeOpNum = 0;
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 1423b1e64d66..9a729bd87044 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
/// registers that correspond to it.
static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
unsigned &OddReg) {
- static const unsigned EvenHalvesOfPairs[] = {
+ static const uint16_t EvenHalvesOfPairs[] = {
SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
};
- static const unsigned OddHalvesOfPairs[] = {
+ static const uint16_t OddHalvesOfPairs[] = {
SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
};
- static const unsigned DoubleRegsInOrder[] = {
+ static const uint16_t DoubleRegsInOrder[] = {
SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
};
- for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
if (DoubleRegsInOrder[i] == DoubleReg) {
EvenReg = EvenHalvesOfPairs[i];
OddReg = OddHalvesOfPairs[i];
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
new file mode 100644
index 000000000000..fe20d2f4bd15
--- /dev/null
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Sparc/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = Sparc
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = SparcCodeGen
+parent = Sparc
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target
+add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
index d3bdf0b503ae..9d4db4d25ef7 100644
--- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc
SparcMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcDesc
- LLVMMC
- LLVMSparcInfo
- LLVMSupport
- )
-
add_dependencies(LLVMSparcDesc SparcCommonTableGen)
diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..97f8f162c27f
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SparcDesc
+parent = Sparc
+required_libraries = MC SparcInfo Support
+add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 6a7e0902354e..f5e10fc3a465 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -16,6 +16,8 @@
using namespace llvm;
+void SparcELFMCAsmInfo::anchor() { }
+
SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
IsLittleEndian = false;
Triple TheTriple(TT);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 0cb6827d2771..f0e1354c212b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//===-- SparcMCAsmInfo.h - Sparc asm properties ----------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,13 +14,15 @@
#ifndef SPARCTARGETASMINFO_H
#define SPARCTARGETASMINFO_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
+ class StringRef;
class Target;
- struct SparcELFMCAsmInfo : public MCAsmInfo {
+ class SparcELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
};
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index cb2a7dfe6160..7fdb0c39285a 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -50,9 +51,10 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
}
static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 2fd9e3f4cbd3..cba775adb1a8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -15,9 +15,7 @@
#define SPARCMCTARGETDESC_H
namespace llvm {
-class MCSubtargetInfo;
class Target;
-class StringRef;
extern Target TheSparcTarget;
extern Target TheSparcV9Target;
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 7b2c6141dbf8..ce6ae17b6ca2 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -18,7 +18,6 @@
#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
namespace llvm {
class FunctionPass;
@@ -74,7 +73,6 @@ namespace llvm {
inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
switch (CC) {
- default: llvm_unreachable("Unknown condition code");
case SPCC::ICC_NE: return "ne";
case SPCC::ICC_E: return "e";
case SPCC::ICC_G: return "g";
@@ -103,7 +101,8 @@ namespace llvm {
case SPCC::FCC_LE: return "le";
case SPCC::FCC_ULE: return "ule";
case SPCC::FCC_O: return "o";
- }
+ }
+ llvm_unreachable("Invalid cond code");
}
} // end namespace llvm
#endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 764336665d0b..611f8e8129f4 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,10 +1,10 @@
-//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
-//
+//===-- Sparc.td - Describe the Sparc Target Machine -------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
//
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 345e1bca54c6..c14b3d4a0065 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -23,7 +23,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -63,6 +62,8 @@ namespace {
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
const;
+
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
};
} // end of anonymous namespace
@@ -82,7 +83,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
}
switch (MO.getType()) {
case MachineOperand::MO_Register:
- O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+ O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
break;
case MachineOperand::MO_Immediate:
@@ -141,13 +142,13 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
std::string operand = "";
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
- default: assert(0 && "Operand is not a register ");
+ default: llvm_unreachable("Operand is not a register");
case MachineOperand::MO_Register:
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
"Operand is not a physical register ");
assert(MO.getReg() != SP::O7 &&
"%o7 is assigned as destination for getpcx!");
- operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+ operand = "%" + StringRef(getRegisterName(MO.getReg())).lower();
break;
}
@@ -237,12 +238,19 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ while (I != Pred->begin() && !(--I)->isTerminator())
; // Noop
- return I == Pred->end() || !I->getDesc().isBarrier();
+ return I == Pred->end() || !I->isBarrier();
}
-
+MachineLocation SparcAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ assert(MI->getNumOperands() == 4 && "Invalid number of operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
// Force static initialization.
extern "C" void LLVMInitializeSparcAsmPrinter() {
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index 856f87ad1d37..d4712208126f 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -1,10 +1,10 @@
-//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
-//
+//===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the Sparc architectures.
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 320c8ca26d7e..1c5c89e97158 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -1,4 +1,4 @@
-//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//===-- SparcFrameLowering.cpp - Sparc Frame Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 9a2ddc83f5aa..210705e2d47a 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -1,4 +1,4 @@
-//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 8c6103dd8a39..93710c4e0b0f 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -176,7 +176,6 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
MulLHS, MulRHS);
// The high part is in the Y register.
return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
- return NULL;
}
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index d70b16375e95..c3e6f1606794 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -51,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
- static const unsigned RegList[] = {
+ static const uint16_t RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
//Try to get first reg
@@ -175,7 +174,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
InVals.push_back(Arg);
continue;
}
@@ -197,7 +196,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
} else {
unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
&SP::IntRegsRegClass);
@@ -237,7 +236,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
MachinePointerInfo(),
- false,false, 0);
+ false,false, false, 0);
InVals.push_back(Load);
continue;
}
@@ -248,7 +247,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
Offset+4,
true);
@@ -256,7 +255,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
@@ -273,7 +272,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
} else {
ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
// Sparc is big endian, so add an offset based on the ObjectVT.
@@ -302,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
// Store remaining ArgRegs to the stack if this is a varargs function.
if (isVarArg) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
- const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
unsigned ArgOffset = CCInfo.getNextStackOffset();
if (NumAllocated == 6)
ArgOffset += StackOffset;
@@ -348,7 +347,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue
SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -467,13 +466,13 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
false, false, 0);
// Sparc is big-endian, so the high part comes first.
SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
// Increment the pointer to the other half.
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getIntPtrConstant(4));
// Load the low part.
SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
@@ -763,7 +762,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -831,22 +832,19 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
/// be zero. Op is expected to be a target specific node. Used by DAG
/// combiner.
void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
APInt KnownZero2, KnownOne2;
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case SPISD::SELECT_ICC:
case SPISD::SELECT_FCC:
- DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
- Depth+1);
- DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
- Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -897,7 +895,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
GlobalBase, RelAddr);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, MachinePointerInfo(), false, false, 0);
+ AbsAddr, MachinePointerInfo(), false, false, false, 0);
}
SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -918,7 +916,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
GlobalBase, RelAddr);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, MachinePointerInfo(), false, false, 0);
+ AbsAddr, MachinePointerInfo(), false, false, false, 0);
}
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -1026,7 +1024,7 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
DebugLoc dl = Node->getDebugLoc();
SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
- MachinePointerInfo(SV), false, false, 0);
+ MachinePointerInfo(SV), false, false, false, 0);
// Increment the pointer, VAList, to the next vaarg
SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
DAG.getConstant(VT.getSizeInBits()/8,
@@ -1038,11 +1036,11 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
// f64 load.
if (VT != MVT::f64)
return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Otherwise, load it as i64, then do a bitconvert.
SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Bit-Convert the value to f64.
SDValue Ops[2] = {
@@ -1103,7 +1101,7 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
FrameAddr = DAG.getLoad(MVT::i32, dl,
Chain,
Ptr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
}
return FrameAddr;
@@ -1135,7 +1133,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
RetAddr = DAG.getLoad(MVT::i32, dl,
Chain,
Ptr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
}
return RetAddr;
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 8a1886a856e0..cf430485cfec 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -15,8 +15,8 @@
#ifndef SPARC_ISELLOWERING_H
#define SPARC_ISELLOWERING_H
-#include "llvm/Target/TargetLowering.h"
#include "Sparc.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace SPISD {
@@ -50,7 +50,6 @@ namespace llvm {
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -77,9 +76,8 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index 6535259e16ff..dce331228b8f 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -1,10 +1,10 @@
-//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
-//
+//===-- SparcInstrFormats.td - Sparc Instruction Formats ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 7a6bf50fa7d4..faff468a587d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//===-- SparcInstrInfo.cpp - Sparc Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -79,7 +79,6 @@ static bool IsIntegerCC(unsigned CC)
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
{
switch(CC) {
- default: llvm_unreachable("Unknown condition code");
case SPCC::ICC_NE: return SPCC::ICC_E;
case SPCC::ICC_E: return SPCC::ICC_NE;
case SPCC::ICC_G: return SPCC::ICC_LE;
@@ -110,6 +109,18 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
case SPCC::FCC_NE: return SPCC::FCC_E;
case SPCC::FCC_E: return SPCC::FCC_NE;
}
+ llvm_unreachable("Invalid cond code");
+}
+
+MachineInstr *
+SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc dl) const {
+ MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
}
@@ -133,7 +144,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
break;
//Terminator is not a branch
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
//Handle Unconditional branches
@@ -195,7 +206,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
.addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
.addMBB(TargetBB);
- MBB.addSuccessor(TargetBB);
+
OldInst->eraseFromParent();
UnCondBrIter->eraseFromParent();
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index eda64efb7a03..204f69855c23 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -1,4 +1,4 @@
-//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//===-- SparcInstrInfo.h - Sparc Instruction Information --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,8 +14,8 @@
#ifndef SPARCINSTRUCTIONINFO_H
#define SPARCINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "SparcRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SparcGenInstrInfo.inc"
@@ -62,6 +62,13 @@ public:
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
+ /// emitFrameIndexDebugValue - Emit a target-dependent form of
+ /// DBG_VALUE encoding the address of a frame index.
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc dl) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index cf5c48fd18d9..15541ef2f837 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -1,10 +1,10 @@
-//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
-//
+//===-- SparcInstrInfo.td - Target Description for Sparc Target -----------===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the Sparc instructions in TableGen format.
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..e7442826e78b
--- /dev/null
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- SparcMachineFunctionInfo.cpp - Sparc Machine Function Info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void SparcMachineFunctionInfo::anchor() { }
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 0b74308eb0ee..90c27a4459a1 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -18,6 +18,7 @@
namespace llvm {
class SparcMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
private:
unsigned GlobalBaseReg;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 8c1625148c8c..63574681b085 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "SparcRegisterInfo.h"
+#include "Sparc.h"
#include "SparcSubtarget.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -33,9 +33,9 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
: SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
}
-const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
- static const unsigned CalleeSavedRegs[] = { 0 };
+ static const uint16_t CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
@@ -118,10 +118,8 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned SparcRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
- return 0;
}
unsigned SparcRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
- return 0;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index f845667b4d9c..9515ad33dcc2 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//===-- SparcRegisterInfo.h - Sparc Register Information Impl ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -32,7 +32,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index cf928293c169..81bff6c51c9d 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -1,10 +1,10 @@
-//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
-//
+//===-- SparcRegisterInfo.td - Sparc Register defs ---------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -39,6 +39,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
let Num = num;
let SubRegs = subregs;
let SubRegIndices = [sub_even, sub_odd];
+ let CoveredBySubRegs = 1;
}
// Control Registers
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 6c501cff6a3a..e5b2aeb1bb85 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//===-- SparcSubtarget.cpp - SPARC Subtarget Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,6 +21,8 @@
using namespace llvm;
+void SparcSubtarget::anchor() { }
+
SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit) :
SparcGenSubtargetInfo(TT, CPU, FS),
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 00a04c3bea57..a81931b34aa2 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//===-- SparcSubtarget.h - Define Subtarget for the SPARC -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,6 +24,7 @@ namespace llvm {
class StringRef;
class SparcSubtarget : public SparcGenSubtargetInfo {
+ virtual void anchor();
bool IsV9;
bool V8DeprecatedInsts;
bool IsVIS;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 3d7b4a47d1a8..6f313562c101 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,9 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "SparcTargetMachine.h"
+#include "Sparc.h"
#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -24,43 +25,73 @@ extern "C" void LLVMInitializeSparcTarget() {
/// SparcTargetMachine ctor - Create an ILP32 architecture model
///
-SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
+SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
FrameLowering(Subtarget) {
}
-bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createSparcISelDag(*this));
+namespace {
+/// Sparc Code Generator Pass Configuration Options.
+class SparcPassConfig : public TargetPassConfig {
+public:
+ SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SparcTargetMachine &getSparcTargetMachine() const {
+ return getTM<SparcTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SparcPassConfig(this, PM);
+}
+
+bool SparcPassConfig::addInstSelector() {
+ PM.add(createSparcISelDag(getSparcTargetMachine()));
return false;
}
/// addPreEmitPass - This pass may be implemented by targets that want to run
/// passes immediately before machine code is emitted. This should return
/// true if -print-machineinstrs should print out the code after the passes.
-bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel){
- PM.add(createSparcFPMoverPass(*this));
- PM.add(createSparcDelaySlotFillerPass(*this));
+bool SparcPassConfig::addPreEmitPass(){
+ PM.add(createSparcFPMoverPass(getSparcTargetMachine()));
+ PM.add(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
return true;
}
+void SparcV8TargetMachine::anchor() { }
+
SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
- CodeModel::Model CM)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
-SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
+void SparcV9TargetMachine::anchor() { }
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
- CodeModel::Model CM)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 3c907dd44de1..b203dfa48921 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,8 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcFrameLowering FrameLowering;
public:
SparcTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM, bool is64bit);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
@@ -54,26 +55,31 @@ public:
virtual const TargetData *getTargetData() const { return &DataLayout; }
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
///
class SparcV8TargetMachine : public SparcTargetMachine {
+ virtual void anchor();
public:
SparcV8TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// SparcV9TargetMachine - Sparc 64-bit target machine
///
class SparcV9TargetMachine : public SparcTargetMachine {
+ virtual void anchor();
public:
SparcV9TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
index a0760231386a..b0d031e0c2be 100644
--- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo
SparcTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMSparcInfo SparcCommonTableGen)
diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..b5c320f92553
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Sparc/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SparcInfo
+parent = Sparc
+required_libraries = MC Support Target
+add_to_library_groups = Sparc
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
deleted file mode 100644
index 7c09c0ea7b5a..000000000000
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SystemZ.td)
-
-llvm_tablegen(SystemZGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(SystemZGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(SystemZGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget)
-add_public_tablegen_target(SystemZCommonTableGen)
-
-add_llvm_target(SystemZCodeGen
- SystemZAsmPrinter.cpp
- SystemZISelDAGToDAG.cpp
- SystemZISelLowering.cpp
- SystemZInstrInfo.cpp
- SystemZFrameLowering.cpp
- SystemZRegisterInfo.cpp
- SystemZSubtarget.cpp
- SystemZTargetMachine.cpp
- SystemZSelectionDAGInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMSystemZCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMSystemZDesc
- LLVMSystemZInfo
- LLVMTarget
- )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 822df097a37d..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_llvm_library(LLVMSystemZDesc
- SystemZMCTargetDesc.cpp
- SystemZMCAsmInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMSystemZDesc
- LLVMMC
- LLVMSystemZInfo
- )
-
-add_dependencies(LLVMSystemZDesc SystemZCommonTableGen)
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
deleted file mode 100644
index 8540546b62d3..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the SystemZMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZMCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-using namespace llvm;
-
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
- IsLittleEndian = false;
- PointerSize = 8;
- PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
- PCSymbol = ".";
-}
-
-const MCSection *SystemZMCAsmInfo::
-getNonexecutableStackSection(MCContext &Ctx) const{
- return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata());
-}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
deleted file mode 100644
index a6a27e2f4b6d..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the SystemZMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SystemZTARGETASMINFO_H
-#define SystemZTARGETASMINFO_H
-
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
- class Target;
- class StringRef;
-
- struct SystemZMCAsmInfo : public MCAsmInfo {
- explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
- virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
- };
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
deleted file mode 100644
index 23fb1e068e70..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides SystemZ specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZMCTargetDesc.h"
-#include "SystemZMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "SystemZGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "SystemZGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "SystemZGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createSystemZMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitSystemZMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitSystemZMCRegisterInfo(X, 0);
- return X;
-}
-
-static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
- StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- if (RM == Reloc::Default)
- RM = Reloc::Static;
- X->InitMCCodeGenInfo(RM, CM);
- return X;
-}
-
-extern "C" void LLVMInitializeSystemZTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
- createSystemZMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
- createSystemZMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
- createSystemZMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
- createSystemZMCSubtargetInfo);
-}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
deleted file mode 100644
index e2ad5afd6e57..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides SystemZ specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZMCTARGETDESC_H
-#define SYSTEMZMCTARGETDESC_H
-
-namespace llvm {
-class MCSubtargetInfo;
-class Target;
-class StringRef;
-
-extern Target TheSystemZTarget;
-
-} // End llvm namespace
-
-// Defines symbolic names for SystemZ registers.
-// This defines a mapping from register name to register number.
-#define GET_REGINFO_ENUM
-#include "SystemZGenRegisterInfo.inc"
-
-// Defines symbolic names for the SystemZ instructions.
-#define GET_INSTRINFO_ENUM
-#include "SystemZGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "SystemZGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
deleted file mode 100644
index 6356491debeb..000000000000
--- a/lib/Target/SystemZ/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMSystemZCodeGen
-TARGET = SystemZ
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \
- SystemZGenAsmWriter.inc SystemZGenDAGISel.inc \
- SystemZGenSubtargetInfo.inc SystemZGenCallingConv.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
deleted file mode 100644
index 88960b9cc601..000000000000
--- a/lib/Target/SystemZ/SystemZ.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in
-// the LLVM SystemZ backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SystemZ_H
-#define LLVM_TARGET_SystemZ_H
-
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class SystemZTargetMachine;
- class FunctionPass;
- class formatted_raw_ostream;
-
- namespace SystemZCC {
- // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
- // SystemZInstrInfo.td. They must be kept in synch.
- enum CondCodes {
- O = 0,
- H = 1,
- NLE = 2,
- L = 3,
- NHE = 4,
- LH = 5,
- NE = 6,
- E = 7,
- NLH = 8,
- HE = 9,
- NL = 10,
- LE = 11,
- NH = 12,
- NO = 13,
- INVALID = -1
- };
- }
-
- FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-
-} // end namespace llvm;
-#endif
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
deleted file mode 100644
index 4c08c087225e..000000000000
--- a/lib/Target/SystemZ/SystemZ.td
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is the top level entry point for the SystemZ target.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features.
-//===----------------------------------------------------------------------===//
-def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
- "Support Z10 instructions">;
-
-//===----------------------------------------------------------------------===//
-// SystemZ supported processors.
-//===----------------------------------------------------------------------===//
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
-
-def : Proc<"z9", []>;
-def : Proc<"z10", [FeatureZ10]>;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "SystemZRegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Description
-//===----------------------------------------------------------------------===//
-
-include "SystemZCallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "SystemZInstrInfo.td"
-include "SystemZInstrFP.td"
-
-def SystemZInstrInfo : InstrInfo {}
-
-//===----------------------------------------------------------------------===//
-// Target Declaration
-//===----------------------------------------------------------------------===//
-
-def SystemZ : Target {
- let InstructionSet = SystemZInstrInfo;
-}
-
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
deleted file mode 100644
index 43dcdfc3936b..000000000000
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the SystemZ assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "SystemZ.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- class SystemZAsmPrinter : public AsmPrinter {
- public:
- SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {}
-
- virtual const char *getPassName() const {
- return "SystemZ Assembly Printer";
- }
-
- void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
- const char* Modifier = 0);
- void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
- void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
- const char* Modifier = 0);
- void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
- const char* Modifier = 0);
- void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
- O << (int16_t)MI->getOperand(OpNum).getImm();
- }
- void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
- O << (uint16_t)MI->getOperand(OpNum).getImm();
- }
- void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
- O << (int32_t)MI->getOperand(OpNum).getImm();
- }
- void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
- O << (uint32_t)MI->getOperand(OpNum).getImm();
- }
-
- void printInstruction(const MachineInstr *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- void EmitInstruction(const MachineInstr *MI);
- };
-} // end of anonymous namespace
-
-#include "SystemZGenAsmWriter.inc"
-
-void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
-}
-
-void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNum);
- switch (MO.getType()) {
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- return;
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
- case MachineOperand::MO_GlobalAddress: {
- const GlobalValue *GV = MO.getGlobal();
- O << *Mang->getSymbol(GV);
-
- // Assemble calls via PLT for externally visible symbols if PIC.
- if (TM.getRelocationModel() == Reloc::PIC_ &&
- !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
- !GV->hasLocalLinkage())
- O << "@PLT";
-
- printOffset(MO.getOffset(), O);
- return;
- }
- case MachineOperand::MO_ExternalSymbol: {
- std::string Name(MAI->getGlobalPrefix());
- Name += MO.getSymbolName();
- O << Name;
-
- if (TM.getRelocationModel() == Reloc::PIC_)
- O << "@PLT";
-
- return;
- }
- default:
- assert(0 && "Not implemented yet!");
- }
-}
-
-
-void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O, const char *Modifier) {
- const MachineOperand &MO = MI->getOperand(OpNum);
- switch (MO.getType()) {
- case MachineOperand::MO_Register: {
- assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
- "Virtual registers should be already mapped!");
- unsigned Reg = MO.getReg();
- if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
- if (strncmp(Modifier + 7, "even", 4) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
- else if (strncmp(Modifier + 7, "odd", 3) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
- else
- assert(0 && "Invalid subreg modifier");
- }
-
- O << '%' << getRegisterName(Reg);
- return;
- }
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- return;
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
- << MO.getIndex();
-
- return;
- case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
- << MO.getIndex();
-
- printOffset(MO.getOffset(), O);
- break;
- case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
- break;
- case MachineOperand::MO_ExternalSymbol: {
- O << *GetExternalSymbolSymbol(MO.getSymbolName());
- break;
- }
- default:
- assert(0 && "Not implemented yet!");
- }
-
- switch (MO.getTargetFlags()) {
- default: assert(0 && "Unknown target flag on GV operand");
- case SystemZII::MO_NO_FLAG:
- break;
- case SystemZII::MO_GOTENT: O << "@GOTENT"; break;
- case SystemZII::MO_PLT: O << "@PLT"; break;
- }
-
- printOffset(MO.getOffset(), O);
-}
-
-void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O,
- const char *Modifier) {
- const MachineOperand &Base = MI->getOperand(OpNum);
-
- // Print displacement operand.
- printOperand(MI, OpNum+1, O);
-
- // Print base operand (if any)
- if (Base.getReg()) {
- O << '(';
- printOperand(MI, OpNum, O);
- O << ')';
- }
-}
-
-void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O,
- const char *Modifier) {
- const MachineOperand &Base = MI->getOperand(OpNum);
- const MachineOperand &Index = MI->getOperand(OpNum+2);
-
- // Print displacement operand.
- printOperand(MI, OpNum+1, O);
-
- // Print base operand (if any)
- if (Base.getReg()) {
- O << '(';
- printOperand(MI, OpNum, O);
- if (Index.getReg()) {
- O << ',';
- printOperand(MI, OpNum+2, O);
- }
- O << ')';
- } else
- assert(!Index.getReg() && "Should allocate base register first!");
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmPrinter() {
- RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
-}
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
deleted file mode 100644
index c799a9e501aa..000000000000
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ /dev/null
@@ -1,46 +0,0 @@
-//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for SystemZ architecture.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SystemZ Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-def RetCC_SystemZ : CallingConv<[
- // Promote i8/i16/i32 arguments to i64.
- CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
- // i64 is returned in register R2
- CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
-
- // f32 / f64 are returned in F0
- CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
- CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
-]>;
-
-//===----------------------------------------------------------------------===//
-// SystemZ Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CC_SystemZ : CallingConv<[
- // Promote i8/i16/i32 arguments to i64.
- CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
- // The first 5 integer arguments of non-varargs functions are passed in
- // integer registers.
- CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
-
- // The first 4 floating point arguments of non-varargs functions are passed
- // in FP registers.
- CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
- CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
-
- // Integer values get stored in stack slots that are 8 bytes in
- // size and 8-byte aligned.
- CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
-]>;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
deleted file mode 100644
index 2ad84a2d052e..000000000000
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ /dev/null
@@ -1,386 +0,0 @@
-//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZFrameLowering.h"
-#include "SystemZInstrBuilder.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
- // Fill the spill offsets map
- static const unsigned SpillOffsTab[][2] = {
- { SystemZ::R2D, 0x10 },
- { SystemZ::R3D, 0x18 },
- { SystemZ::R4D, 0x20 },
- { SystemZ::R5D, 0x28 },
- { SystemZ::R6D, 0x30 },
- { SystemZ::R7D, 0x38 },
- { SystemZ::R8D, 0x40 },
- { SystemZ::R9D, 0x48 },
- { SystemZ::R10D, 0x50 },
- { SystemZ::R11D, 0x58 },
- { SystemZ::R12D, 0x60 },
- { SystemZ::R13D, 0x68 },
- { SystemZ::R14D, 0x70 },
- { SystemZ::R15D, 0x78 }
- };
-
- RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-
- for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
- RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
-}
-
-/// needsFP - Return true if the specified function should have a dedicated
-/// frame pointer register. This is true if the function has variable sized
-/// allocas or if frame pointer elimination is disabled.
-bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- int64_t NumBytes, const TargetInstrInfo &TII) {
- unsigned Opc; uint64_t Chunk;
- bool isSub = NumBytes < 0;
- uint64_t Offset = isSub ? -NumBytes : NumBytes;
-
- if (Offset >= (1LL << 15) - 1) {
- Opc = SystemZ::ADD64ri32;
- Chunk = (1LL << 31) - 1;
- } else {
- Opc = SystemZ::ADD64ri16;
- Chunk = (1LL << 15) - 1;
- }
-
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- while (Offset) {
- uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
- .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
- // The PSW implicit def is dead.
- MI->getOperand(3).setIsDead();
- Offset -= ThisVal;
- }
-}
-
-void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const SystemZInstrInfo &TII =
- *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
- SystemZMachineFunctionInfo *SystemZMFI =
- MF.getInfo<SystemZMachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Get the number of bytes to allocate from the FrameInfo.
- // Note that area for callee-saved stuff is already allocated, thus we need to
- // 'undo' the stack movement.
- uint64_t StackSize = MFI->getStackSize();
- StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
- uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
-
- // Skip the callee-saved push instructions.
- while (MBBI != MBB.end() &&
- (MBBI->getOpcode() == SystemZ::MOV64mr ||
- MBBI->getOpcode() == SystemZ::MOV64mrm))
- ++MBBI;
-
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
-
- // adjust stack pointer: R15 -= numbytes
- if (StackSize || MFI->hasCalls()) {
- assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
- "Invalid stack frame calculation!");
- emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
- }
-
- if (hasFP(MF)) {
- // Update R11 with the new base value...
- BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
- .addReg(SystemZ::R15D);
-
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(SystemZ::R11D);
-
- }
-}
-
-void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- const SystemZInstrInfo &TII =
- *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
- SystemZMachineFunctionInfo *SystemZMFI =
- MF.getInfo<SystemZMachineFunctionInfo>();
- unsigned RetOpcode = MBBI->getOpcode();
-
- switch (RetOpcode) {
- case SystemZ::RET: break; // These are ok
- default:
- assert(0 && "Can only insert epilog into returning blocks");
- }
-
- // Get the number of bytes to allocate from the FrameInfo
- // Note that area for callee-saved stuff is already allocated, thus we need to
- // 'undo' the stack movement.
- uint64_t StackSize =
- MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
- uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
-
- // Skip the final terminator instruction.
- while (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PI = prior(MBBI);
- --MBBI;
- if (!PI->getDesc().isTerminator())
- break;
- }
-
- // During callee-saved restores emission stack frame was not yet finialized
- // (and thus - the stack size was unknown). Tune the offset having full stack
- // size in hands.
- if (StackSize || MFI->hasCalls()) {
- assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
- MBBI->getOpcode() == SystemZ::MOV64rm) &&
- "Expected to see callee-save register restore code");
- assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
- "Invalid stack frame calculation!");
-
- unsigned i = 0;
- MachineInstr &MI = *MBBI;
- while (!MI.getOperand(i).isImm()) {
- ++i;
- assert(i < MI.getNumOperands() && "Unexpected restore code!");
- }
-
- uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
- // If Offset does not fit into 20-bit signed displacement field we need to
- // emit some additional code...
- if (Offset > 524287) {
- // Fold the displacement into load instruction as much as possible.
- NumBytes = Offset - 524287;
- Offset = 524287;
- emitSPUpdate(MBB, MBBI, NumBytes, TII);
- }
-
- MI.getOperand(i).ChangeToImmediate(Offset);
- }
-}
-
-int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const SystemZMachineFunctionInfo *SystemZMFI =
- MF.getInfo<SystemZMachineFunctionInfo>();
- int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
- uint64_t StackSize = MFI->getStackSize();
-
- // Fixed objects are really located in the "previous" frame.
- if (FI < 0)
- StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
- Offset += StackSize - getOffsetOfLocalArea();
-
- // Skip the register save area if we generated the stack frame.
- if (StackSize || MFI->hasCalls())
- Offset -= getOffsetOfLocalArea();
-
- return Offset;
-}
-
-bool
-SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
- unsigned CalleeFrameSize = 0;
-
- // Scan the callee-saved and find the bounds of register spill area.
- unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (!SystemZ::FP64RegClass.contains(Reg)) {
- unsigned Offset = RegSpillOffsets[Reg];
- CalleeFrameSize += 8;
- if (StartOffset > Offset) {
- LowReg = Reg; StartOffset = Offset;
- }
- if (EndOffset < Offset) {
- HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
- }
- }
- }
-
- // Save information for epilogue inserter.
- MFI->setCalleeSavedFrameSize(CalleeFrameSize);
- MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
-
- // Save GPRs
- if (StartOffset) {
- // Build a store instruction. Use STORE MULTIPLE instruction if there are many
- // registers to store, otherwise - just STORE.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
- SystemZ::MOV64mr : SystemZ::MOV64mrm)));
-
- // Add store operands.
- MIB.addReg(SystemZ::R15D).addImm(StartOffset);
- if (LowReg == HighReg)
- MIB.addReg(0);
- MIB.addReg(LowReg, RegState::Kill);
- if (LowReg != HighReg)
- MIB.addReg(HighReg, RegState::Kill);
-
- // Do a second scan adding regs as being killed by instruction
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- if (Reg != LowReg && Reg != HighReg)
- MIB.addReg(Reg, RegState::ImplicitKill);
- }
- }
-
- // Save FPRs
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (SystemZ::FP64RegClass.contains(Reg)) {
- MBB.addLiveIn(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
- &SystemZ::FP64RegClass, TRI);
- }
- }
-
- return true;
-}
-
-bool
-SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
-
- // Restore FP registers
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (SystemZ::FP64RegClass.contains(Reg))
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- &SystemZ::FP64RegClass, TRI);
- }
-
- // Restore GP registers
- unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
- unsigned StartOffset = RegSpillOffsets[LowReg];
-
- if (StartOffset) {
- // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
- // registers to load, otherwise - just LOAD.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
- SystemZ::MOV64rm : SystemZ::MOV64rmm)));
- // Add store operands.
- MIB.addReg(LowReg, RegState::Define);
- if (LowReg != HighReg)
- MIB.addReg(HighReg, RegState::Define);
-
- MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
- MIB.addImm(StartOffset);
- if (LowReg == HighReg)
- MIB.addReg(0);
-
- // Do a second scan adding regs as being defined by instruction
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Reg != LowReg && Reg != HighReg)
- MIB.addReg(Reg, RegState::ImplicitDefine);
- }
- }
-
- return true;
-}
-
-void
-SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- // Determine whether R15/R14 will ever be clobbered inside the function. And
- // if yes - mark it as 'callee' saved.
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // Check whether high FPRs are ever used, if yes - we need to save R15 as
- // well.
- static const unsigned HighFPRs[] = {
- SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
- SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
- SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
- SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
- };
-
- bool HighFPRsUsed = false;
- for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
- HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
-
- if (FFI->hasCalls())
- /* FIXME: function is varargs */
- /* FIXME: function grabs RA */
- /* FIXME: function calls eh_return */
- MRI.setPhysRegUsed(SystemZ::R14D);
-
- if (HighFPRsUsed ||
- FFI->hasCalls() ||
- FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
- FFI->hasVarSizedObjects() // Function calls dynamic alloca's
- /* FIXME: function is varargs */)
- MRI.setPhysRegUsed(SystemZ::R15D);
-}
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
deleted file mode 100644
index 1284b6802b3a..000000000000
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZ_FRAMEINFO_H
-#define SYSTEMZ_FRAMEINFO_H
-
-#include "SystemZ.h"
-#include "SystemZSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/ADT/IndexedMap.h"
-
-namespace llvm {
- class SystemZSubtarget;
-
-class SystemZFrameLowering : public TargetFrameLowering {
- IndexedMap<unsigned> RegSpillOffsets;
-protected:
- const SystemZSubtarget &STI;
-
-public:
- explicit SystemZFrameLowering(const SystemZSubtarget &sti);
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
-
- bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
- bool hasFP(const MachineFunction &MF) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
deleted file mode 100644
index 2186ff1fed54..000000000000
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ /dev/null
@@ -1,779 +0,0 @@
-//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the SystemZ target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
- /// instead of register numbers for the leaves of the matched tree.
- struct SystemZRRIAddressMode {
- enum {
- RegBase,
- FrameIndexBase
- } BaseType;
-
- struct { // This is really a union, discriminated by BaseType!
- SDValue Reg;
- int FrameIndex;
- } Base;
-
- SDValue IndexReg;
- int64_t Disp;
- bool isRI;
-
- SystemZRRIAddressMode(bool RI = false)
- : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
- }
-
- void dump() {
- errs() << "SystemZRRIAddressMode " << this << '\n';
- if (BaseType == RegBase) {
- errs() << "Base.Reg ";
- if (Base.Reg.getNode() != 0)
- Base.Reg.getNode()->dump();
- else
- errs() << "nul";
- errs() << '\n';
- } else {
- errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
- }
- if (!isRI) {
- errs() << "IndexReg ";
- if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
- else errs() << "nul";
- }
- errs() << " Disp " << Disp << '\n';
- }
- };
-}
-
-/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
-/// instructions for SelectionDAG operations.
-///
-namespace {
- class SystemZDAGToDAGISel : public SelectionDAGISel {
- const SystemZTargetLowering &Lowering;
- const SystemZSubtarget &Subtarget;
-
- void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
- SDValue &Base, SDValue &Disp);
- void getAddressOperands(const SystemZRRIAddressMode &AM,
- SDValue &Base, SDValue &Disp,
- SDValue &Index);
-
- public:
- SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel),
- Lowering(*TM.getTargetLowering()),
- Subtarget(*TM.getSubtargetImpl()) { }
-
- virtual const char *getPassName() const {
- return "SystemZ DAG->DAG Pattern Instruction Selection";
- }
-
- /// getI8Imm - Return a target constant with the specified value, of type
- /// i8.
- inline SDValue getI8Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i8);
- }
-
- /// getI16Imm - Return a target constant with the specified value, of type
- /// i16.
- inline SDValue getI16Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i16);
- }
-
- /// getI32Imm - Return a target constant with the specified value, of type
- /// i32.
- inline SDValue getI32Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
- }
-
- // Include the pieces autogenerated from the target description.
- #include "SystemZGenDAGISel.inc"
-
- private:
- bool SelectAddrRI12Only(SDValue& Addr,
- SDValue &Base, SDValue &Disp);
- bool SelectAddrRI12(SDValue& Addr,
- SDValue &Base, SDValue &Disp,
- bool is12BitOnly = false);
- bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
- bool SelectAddrRRI12(SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index);
- bool SelectAddrRRI20(SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index);
- bool SelectLAAddr(SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index);
-
- SDNode *Select(SDNode *Node);
-
- bool TryFoldLoad(SDNode *P, SDValue N,
- SDValue &Base, SDValue &Disp, SDValue &Index);
-
- bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
- bool is12Bit, unsigned Depth = 0);
- bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
- };
-} // end anonymous namespace
-
-/// createSystemZISelDag - This pass converts a legalized DAG into a
-/// SystemZ-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new SystemZDAGToDAGISel(TM, OptLevel);
-}
-
-/// isImmSExt20 - This method tests to see if the node is either a 32-bit
-/// or 64-bit immediate, and if the value can be accurately represented as a
-/// sign extension from a 20-bit value. If so, this returns true and the
-/// immediate.
-static bool isImmSExt20(int64_t Val, int64_t &Imm) {
- if (Val >= -524288 && Val <= 524287) {
- Imm = Val;
- return true;
- }
- return false;
-}
-
-/// isImmZExt12 - This method tests to see if the node is either a 32-bit
-/// or 64-bit immediate, and if the value can be accurately represented as a
-/// zero extension from a 12-bit value. If so, this returns true and the
-/// immediate.
-static bool isImmZExt12(int64_t Val, int64_t &Imm) {
- if (Val >= 0 && Val <= 0xFFF) {
- Imm = Val;
- return true;
- }
- return false;
-}
-
-/// MatchAddress - Add the specified node to the specified addressing mode,
-/// returning true if it cannot be done. This just pattern matches for the
-/// addressing mode.
-bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
- bool is12Bit, unsigned Depth) {
- DebugLoc dl = N.getDebugLoc();
- DEBUG(errs() << "MatchAddress: "; AM.dump());
- // Limit recursion.
- if (Depth > 5)
- return MatchAddressBase(N, AM);
-
- // FIXME: We can perform better here. If we have something like
- // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
- // imm into addressing mode.
- switch (N.getOpcode()) {
- default: break;
- case ISD::Constant: {
- int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
- int64_t Imm = 0;
- bool Match = (is12Bit ?
- isImmZExt12(AM.Disp + Val, Imm) :
- isImmSExt20(AM.Disp + Val, Imm));
- if (Match) {
- AM.Disp = Imm;
- return false;
- }
- break;
- }
-
- case ISD::FrameIndex:
- if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
- AM.Base.Reg.getNode() == 0) {
- AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
- AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
- return false;
- }
- break;
-
- case ISD::SUB: {
- // Given A-B, if A can be completely folded into the address and
- // the index field with the index field unused, use -B as the index.
- // This is a win if a has multiple parts that can be folded into
- // the address. Also, this saves a mov if the base register has
- // other uses, since it avoids a two-address sub instruction, however
- // it costs an additional mov if the index register has other uses.
-
- // Test if the LHS of the sub can be folded.
- SystemZRRIAddressMode Backup = AM;
- if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
- AM = Backup;
- break;
- }
- // Test if the index field is free for use.
- if (AM.IndexReg.getNode() || AM.isRI) {
- AM = Backup;
- break;
- }
-
- // If the base is a register with multiple uses, this transformation may
- // save a mov. Otherwise it's probably better not to do it.
- if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
- (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
- AM = Backup;
- break;
- }
-
- // Ok, the transformation is legal and appears profitable. Go for it.
- SDValue RHS = N.getNode()->getOperand(1);
- SDValue Zero = CurDAG->getConstant(0, N.getValueType());
- SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
- AM.IndexReg = Neg;
-
- // Insert the new nodes into the topological ordering.
- if (Zero.getNode()->getNodeId() == -1 ||
- Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Zero.getNode());
- Zero.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (Neg.getNode()->getNodeId() == -1 ||
- Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Neg.getNode());
- Neg.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- return false;
- }
-
- case ISD::ADD: {
- SystemZRRIAddressMode Backup = AM;
- if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
- !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
- return false;
- AM = Backup;
- if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
- !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
- return false;
- AM = Backup;
-
- // If we couldn't fold both operands into the address at the same time,
- // see if we can just put each operand into a register and fold at least
- // the add.
- if (!AM.isRI &&
- AM.BaseType == SystemZRRIAddressMode::RegBase &&
- !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
- AM.Base.Reg = N.getNode()->getOperand(0);
- AM.IndexReg = N.getNode()->getOperand(1);
- return false;
- }
- break;
- }
-
- case ISD::OR:
- // Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- SystemZRRIAddressMode Backup = AM;
- int64_t Offset = CN->getSExtValue();
- int64_t Imm = 0;
- bool MatchOffset = (is12Bit ?
- isImmZExt12(AM.Disp + Offset, Imm) :
- isImmSExt20(AM.Disp + Offset, Imm));
- // The resultant disp must fit in 12 or 20-bits.
- if (MatchOffset &&
- // LHS should be an addr mode.
- !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
- // Check to see if the LHS & C is zero.
- CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
- AM.Disp = Imm;
- return false;
- }
- AM = Backup;
- }
- break;
- }
-
- return MatchAddressBase(N, AM);
-}
-
-/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
-/// specified addressing mode without any further recursion.
-bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
- SystemZRRIAddressMode &AM) {
- // Is the base register already occupied?
- if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
- // If so, check to see if the index register is set.
- if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
- AM.IndexReg = N;
- return false;
- }
-
- // Otherwise, we cannot select it.
- return true;
- }
-
- // Default, generate it as a register.
- AM.BaseType = SystemZRRIAddressMode::RegBase;
- AM.Base.Reg = N;
- return false;
-}
-
-void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
- SDValue &Base, SDValue &Disp) {
- if (AM.BaseType == SystemZRRIAddressMode::RegBase)
- Base = AM.Base.Reg;
- else
- Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
- Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
-}
-
-void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
- SDValue &Base, SDValue &Disp,
- SDValue &Index) {
- getAddressOperandsRI(AM, Base, Disp);
- Index = AM.IndexReg;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// an unsigned 12-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
- SDValue &Base, SDValue &Disp) {
- return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
-}
-
-bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
- SDValue &Base, SDValue &Disp,
- bool is12BitOnly) {
- SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
- bool Done = false;
-
- if (!Addr.hasOneUse()) {
- unsigned Opcode = Addr.getOpcode();
- if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
- // If we are able to fold N into addressing mode, then we'll allow it even
- // if N has multiple uses. In general, addressing computation is used as
- // addresses by all of its uses. But watch out for CopyToReg uses, that
- // means the address computation is liveout. It will be computed by a LA
- // so we want to avoid computing the address twice.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- MatchAddressBase(Addr, AM12);
- Done = true;
- break;
- }
- }
- }
- }
- if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
- return false;
-
- // Check, whether we can match stuff using 20-bit displacements
- if (!Done && !is12BitOnly &&
- !MatchAddress(Addr, AM20, /* is12Bit */ false))
- if (AM12.Disp == 0 && AM20.Disp != 0)
- return false;
-
- DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
-
- EVT VT = Addr.getValueType();
- if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
- if (!AM12.Base.Reg.getNode())
- AM12.Base.Reg = CurDAG->getRegister(0, VT);
- }
-
- assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
-
- getAddressOperandsRI(AM12, Base, Disp);
-
- return true;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
- SDValue &Base, SDValue &Disp) {
- SystemZRRIAddressMode AM(/*isRI*/true);
- bool Done = false;
-
- if (!Addr.hasOneUse()) {
- unsigned Opcode = Addr.getOpcode();
- if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
- // If we are able to fold N into addressing mode, then we'll allow it even
- // if N has multiple uses. In general, addressing computation is used as
- // addresses by all of its uses. But watch out for CopyToReg uses, that
- // means the address computation is liveout. It will be computed by a LA
- // so we want to avoid computing the address twice.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- MatchAddressBase(Addr, AM);
- Done = true;
- break;
- }
- }
- }
- }
- if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
- return false;
-
- DEBUG(errs() << "MatchAddress (final): "; AM.dump());
-
- EVT VT = Addr.getValueType();
- if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
- if (!AM.Base.Reg.getNode())
- AM.Base.Reg = CurDAG->getRegister(0, VT);
- }
-
- assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
-
- getAddressOperandsRI(AM, Base, Disp);
-
- return true;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// index register plus an unsigned 12-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index) {
- SystemZRRIAddressMode AM20, AM12;
- bool Done = false;
-
- if (!Addr.hasOneUse()) {
- unsigned Opcode = Addr.getOpcode();
- if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
- // If we are able to fold N into addressing mode, then we'll allow it even
- // if N has multiple uses. In general, addressing computation is used as
- // addresses by all of its uses. But watch out for CopyToReg uses, that
- // means the address computation is liveout. It will be computed by a LA
- // so we want to avoid computing the address twice.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- MatchAddressBase(Addr, AM12);
- Done = true;
- break;
- }
- }
- }
- }
- if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
- return false;
-
- // Check, whether we can match stuff using 20-bit displacements
- if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
- if (AM12.Disp == 0 && AM20.Disp != 0)
- return false;
-
- DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
-
- EVT VT = Addr.getValueType();
- if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
- if (!AM12.Base.Reg.getNode())
- AM12.Base.Reg = CurDAG->getRegister(0, VT);
- }
-
- if (!AM12.IndexReg.getNode())
- AM12.IndexReg = CurDAG->getRegister(0, VT);
-
- getAddressOperands(AM12, Base, Disp, Index);
-
- return true;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index) {
- SystemZRRIAddressMode AM;
- bool Done = false;
-
- if (!Addr.hasOneUse()) {
- unsigned Opcode = Addr.getOpcode();
- if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
- // If we are able to fold N into addressing mode, then we'll allow it even
- // if N has multiple uses. In general, addressing computation is used as
- // addresses by all of its uses. But watch out for CopyToReg uses, that
- // means the address computation is liveout. It will be computed by a LA
- // so we want to avoid computing the address twice.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- MatchAddressBase(Addr, AM);
- Done = true;
- break;
- }
- }
- }
- }
- if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
- return false;
-
- DEBUG(errs() << "MatchAddress (final): "; AM.dump());
-
- EVT VT = Addr.getValueType();
- if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
- if (!AM.Base.Reg.getNode())
- AM.Base.Reg = CurDAG->getRegister(0, VT);
- }
-
- if (!AM.IndexReg.getNode())
- AM.IndexReg = CurDAG->getRegister(0, VT);
-
- getAddressOperands(AM, Base, Disp, Index);
-
- return true;
-}
-
-/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
-/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
-bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index) {
- SystemZRRIAddressMode AM;
-
- if (MatchAddress(Addr, AM, false))
- return false;
-
- EVT VT = Addr.getValueType();
- unsigned Complexity = 0;
- if (AM.BaseType == SystemZRRIAddressMode::RegBase)
- if (AM.Base.Reg.getNode())
- Complexity = 1;
- else
- AM.Base.Reg = CurDAG->getRegister(0, VT);
- else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
- Complexity = 4;
-
- if (AM.IndexReg.getNode())
- Complexity += 1;
- else
- AM.IndexReg = CurDAG->getRegister(0, VT);
-
- if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
- Complexity += 1;
-
- if (Complexity > 2) {
- getAddressOperands(AM, Base, Disp, Index);
- return true;
- }
-
- return false;
-}
-
-bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
- SDValue &Base, SDValue &Disp, SDValue &Index) {
- if (ISD::isNON_EXTLoad(N.getNode()) &&
- IsLegalToFold(N, P, P, OptLevel))
- return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
- return false;
-}
-
-SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
- EVT NVT = Node->getValueType(0);
- DebugLoc dl = Node->getDebugLoc();
- unsigned Opcode = Node->getOpcode();
-
- // Dump information about the Node being selected
- DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
-
- // If we have a custom node, we already have selected!
- if (Node->isMachineOpcode()) {
- DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
- return NULL; // Already selected.
- }
-
- switch (Opcode) {
- default: break;
- case ISD::SDIVREM: {
- unsigned Opc, MOpc;
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
-
- EVT ResVT;
- bool is32Bit = false;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i32:
- Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
- ResVT = MVT::v2i64;
- is32Bit = true;
- break;
- case MVT::i64:
- Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
- ResVT = MVT::v2i64;
- break;
- }
-
- SDValue Tmp0, Tmp1, Tmp2;
- bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
-
- // Prepare the dividend
- SDNode *Dividend;
- if (is32Bit)
- Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
- else
- Dividend = N0.getNode();
-
- // Insert prepared dividend into suitable 'subreg'
- SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, ResVT);
- Dividend =
- CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
- SDValue(Tmp, 0), SDValue(Dividend, 0),
- CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
-
- SDNode *Result;
- SDValue DivVal = SDValue(Dividend, 0);
- if (foldedLoad) {
- SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
- Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
- Ops, array_lengthof(Ops));
- // Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(Result, 1));
- } else {
- Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
- }
-
- // Copy the division (odd subreg) result, if it is needed.
- if (!SDValue(Node, 0).use_empty()) {
- unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_odd32 : SystemZ::subreg_odd);
- SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, NVT,
- SDValue(Result, 0),
- CurDAG->getTargetConstant(SubRegIdx,
- MVT::i32));
-
- ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
- DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
- }
-
- // Copy the remainder (even subreg) result, if it is needed.
- if (!SDValue(Node, 1).use_empty()) {
- unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_32bit : SystemZ::subreg_even);
- SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, NVT,
- SDValue(Result, 0),
- CurDAG->getTargetConstant(SubRegIdx,
- MVT::i32));
-
- ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
- DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
- }
-
- return NULL;
- }
- case ISD::UDIVREM: {
- unsigned Opc, MOpc, ClrOpc;
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
- EVT ResVT;
-
- bool is32Bit = false;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: assert(0 && "Unsupported VT!");
- case MVT::i32:
- Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
- ClrOpc = SystemZ::MOV64Pr0_even;
- ResVT = MVT::v2i32;
- is32Bit = true;
- break;
- case MVT::i64:
- Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
- ClrOpc = SystemZ::MOV128r0_even;
- ResVT = MVT::v2i64;
- break;
- }
-
- SDValue Tmp0, Tmp1, Tmp2;
- bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
-
- // Prepare the dividend
- SDNode *Dividend = N0.getNode();
-
- // Insert prepared dividend into suitable 'subreg'
- SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, ResVT);
- {
- unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_odd32 : SystemZ::subreg_odd);
- Dividend =
- CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
- SDValue(Tmp, 0), SDValue(Dividend, 0),
- CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
- }
-
- // Zero out even subreg
- Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
-
- SDValue DivVal = SDValue(Dividend, 0);
- SDNode *Result;
- if (foldedLoad) {
- SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
- Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
- Ops, array_lengthof(Ops));
- // Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(Result, 1));
- } else {
- Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
- }
-
- // Copy the division (odd subreg) result, if it is needed.
- if (!SDValue(Node, 0).use_empty()) {
- unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_odd32 : SystemZ::subreg_odd);
- SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, NVT,
- SDValue(Result, 0),
- CurDAG->getTargetConstant(SubRegIdx,
- MVT::i32));
- ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
- DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
- }
-
- // Copy the remainder (even subreg) result, if it is needed.
- if (!SDValue(Node, 1).use_empty()) {
- unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_32bit : SystemZ::subreg_even);
- SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, NVT,
- SDValue(Result, 0),
- CurDAG->getTargetConstant(SubRegIdx,
- MVT::i32));
- ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
- DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
- }
-
- return NULL;
- }
- }
-
- // Select the default instruction
- SDNode *ResNode = SelectCode(Node);
-
- DEBUG(errs() << "=> ";
- if (ResNode == NULL || ResNode == Node)
- Node->dump(CurDAG);
- else
- ResNode->dump(CurDAG);
- errs() << "\n";
- );
- return ResNode;
-}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
deleted file mode 100644
index 48ca99ff9ea2..000000000000
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ /dev/null
@@ -1,868 +0,0 @@
-//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation -----==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SystemZTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "systemz-lower"
-
-#include "SystemZISelLowering.h"
-#include "SystemZ.h"
-#include "SystemZTargetMachine.h"
-#include "SystemZSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/VectorExtras.h"
-using namespace llvm;
-
-SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
- TargetLowering(tm, new TargetLoweringObjectFileELF()),
- Subtarget(*tm.getSubtargetImpl()), TM(tm) {
-
- RegInfo = TM.getRegisterInfo();
-
- // Set up the register classes.
- addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass);
- addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass);
- addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
- addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
-
- if (!UseSoftFloat) {
- addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
- addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
- }
-
- // Compute derived properties from the register classes
- computeRegisterProperties();
-
- // Provide all sorts of operation actions
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
-
- setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
- setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-
- setStackPointerRegisterToSaveRestore(SystemZ::R15D);
-
- // TODO: It may be better to default to latency-oriented scheduling, however
- // LLVM's current latency-oriented scheduler can't handle physreg definitions
- // such as SystemZ has with PSW, so set this to the register-pressure
- // scheduler, because it can.
- setSchedulingPreference(Sched::RegPressure);
-
- setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
- setOperationAction(ISD::BR_CC, MVT::i64, Custom);
- setOperationAction(ISD::BR_CC, MVT::f32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f64, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
- setOperationAction(ISD::JumpTable, MVT::i64, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ, MVT::i32, Promote);
- setOperationAction(ISD::CTLZ, MVT::i64, Legal);
-
- // FIXME: Can we lower these 2 efficiently?
- setOperationAction(ISD::SETCC, MVT::i32, Expand);
- setOperationAction(ISD::SETCC, MVT::i64, Expand);
- setOperationAction(ISD::SETCC, MVT::f32, Expand);
- setOperationAction(ISD::SETCC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
- // FIXME: Can we support these natively?
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
-
- // Lower some FP stuff
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
- setOperationAction(ISD::FCOS, MVT::f32, Expand);
- setOperationAction(ISD::FCOS, MVT::f64, Expand);
- setOperationAction(ISD::FREM, MVT::f32, Expand);
- setOperationAction(ISD::FREM, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
- setOperationAction(ISD::FMA, MVT::f64, Expand);
-
- // We have only 64-bit bitconverts
- setOperationAction(ISD::BITCAST, MVT::f32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i32, Expand);
-
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- setMinFunctionAlignment(1);
-}
-
-SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
- SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- default:
- llvm_unreachable("Should not custom lower this!");
- return SDValue();
- }
-}
-
-bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
- return false;
-
- // +0.0 lzer
- // +0.0f lzdr
- // -0.0 lzer + lner
- // -0.0f lzdr + lndr
- return Imm.isZero() || Imm.isNegZero();
-}
-
-//===----------------------------------------------------------------------===//
-// SystemZ Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-TargetLowering::ConstraintType
-SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- case 'r':
- return C_RegisterClass;
- default:
- break;
- }
- }
- return TargetLowering::getConstraintType(Constraint);
-}
-
-std::pair<unsigned, const TargetRegisterClass*>
-SystemZTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
- if (Constraint.size() == 1) {
- // GCC Constraint Letters
- switch (Constraint[0]) {
- default: break;
- case 'r': // GENERAL_REGS
- if (VT == MVT::i32)
- return std::make_pair(0U, SystemZ::GR32RegisterClass);
- else if (VT == MVT::i128)
- return std::make_pair(0U, SystemZ::GR128RegisterClass);
-
- return std::make_pair(0U, SystemZ::GR64RegisterClass);
- }
- }
-
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "SystemZGenCallingConv.inc"
-
-SDValue
-SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
- switch (CallConv) {
- default:
- llvm_unreachable("Unsupported calling convention");
- case CallingConv::C:
- case CallingConv::Fast:
- return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
- }
-}
-
-SDValue
-SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- // SystemZ target does not yet support tail call optimization.
- isTailCall = false;
-
- switch (CallConv) {
- default:
- llvm_unreachable("Unsupported calling convention");
- case CallingConv::Fast:
- case CallingConv::C:
- return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, OutVals, Ins, dl, DAG, InVals);
- }
-}
-
-/// LowerCCCArguments - transform physical registers into virtual registers and
-/// generate load operations for arguments places on the stack.
-// FIXME: struct return stuff
-// FIXME: varargs
-SDValue
-SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
-
- if (isVarArg)
- report_fatal_error("Varargs not supported yet");
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- SDValue ArgValue;
- CCValAssign &VA = ArgLocs[i];
- EVT LocVT = VA.getLocVT();
- if (VA.isRegLoc()) {
- // Arguments passed in registers
- TargetRegisterClass *RC;
- switch (LocVT.getSimpleVT().SimpleTy) {
- default:
-#ifndef NDEBUG
- errs() << "LowerFormalArguments Unhandled argument type: "
- << LocVT.getSimpleVT().SimpleTy
- << "\n";
-#endif
- llvm_unreachable(0);
- case MVT::i64:
- RC = SystemZ::GR64RegisterClass;
- break;
- case MVT::f32:
- RC = SystemZ::FP32RegisterClass;
- break;
- case MVT::f64:
- RC = SystemZ::FP64RegisterClass;
- break;
- }
-
- unsigned VReg = RegInfo.createVirtualRegister(RC);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
- } else {
- // Sanity check
- assert(VA.isMemLoc());
-
- // Create the nodes corresponding to a load from this parameter slot.
- // Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
- VA.getLocMemOffset(), true);
-
- // Create the SelectionDAG nodes corresponding to a load
- // from this parameter
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
- }
-
- // If this is an 8/16/32-bit value, it is really passed promoted to 64
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
- InVals.push_back(ArgValue);
- }
-
- return Chain;
-}
-
-/// LowerCCCCallTo - functions arguments are copied from virtual regs to
-/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
-/// TODO: sret.
-SDValue
-SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
- const SmallVectorImpl<ISD::OutputArg>
- &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
-
- // Offset to first argument stack slot.
- const unsigned FirstArgOffset = 160;
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
- CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
- getPointerTy(), true));
-
- SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
- SmallVector<SDValue, 12> MemOpChains;
- SDValue StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
-
- SDValue Arg = OutVals[i];
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- }
-
- // Arguments that can be passed on register must be kept at RegsToPass
- // vector
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
-
- if (StackPtr.getNode() == 0)
- StackPtr =
- DAG.getCopyFromReg(Chain, dl,
- (TFI->hasFP(MF) ?
- SystemZ::R11D : SystemZ::R15D),
- getPointerTy());
-
- unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
- SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- StackPtr,
- DAG.getIntPtrConstant(Offset));
-
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
- }
- }
-
- // Transform all store nodes into one single node because all store nodes are
- // independent of each other.
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain and
- // flag operands which copy the outgoing args into registers. The InFlag in
- // necessary since all emitted instructions must be stuck together.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- // Likewise ExternalSymbol -> TargetExternalSymbol.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
- else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
-
- Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getConstant(NumBytes, getPointerTy(), true),
- DAG.getConstant(0, getPointerTy(), true),
- InFlag);
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
- DAG, InVals);
-}
-
-/// LowerCallResult - Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-///
-SDValue
-SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
-
- // Assign locations to each value returned by this call.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
-
- // Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
-
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- VA.getLocVT(), InFlag).getValue(1);
- SDValue RetValue = Chain.getValue(0);
- InFlag = Chain.getValue(2);
-
- // If this is an 8/16/32-bit value, it is really passed promoted to 64
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
-
- InVals.push_back(RetValue);
- }
-
- return Chain;
-}
-
-
-SDValue
-SystemZTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
- // CCValAssign - represent the assignment of the return value to a location
- SmallVector<CCValAssign, 16> RVLocs;
-
- // CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- // Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
-
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
- SDValue Flag;
-
- // Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
- SDValue ResValue = OutVals[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
-
- // If this is an 8/16/32-bit value, it is really should be passed promoted
- // to 64 bits.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
- else if (VA.getLocInfo() == CCValAssign::AExt)
- ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
-
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
-
- // Guarantee that all emitted copies are stuck together,
- // avoiding something bad.
- Flag = Chain.getValue(1);
- }
-
- if (Flag.getNode())
- return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-
- // Return Void
- return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
-}
-
-SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
- ISD::CondCode CC, SDValue &SystemZCC,
- SelectionDAG &DAG) const {
- // FIXME: Emit a test if RHS is zero
-
- bool isUnsigned = false;
- SystemZCC::CondCodes TCC;
- switch (CC) {
- default:
- llvm_unreachable("Invalid integer condition!");
- case ISD::SETEQ:
- case ISD::SETOEQ:
- TCC = SystemZCC::E;
- break;
- case ISD::SETUEQ:
- TCC = SystemZCC::NLH;
- break;
- case ISD::SETNE:
- case ISD::SETONE:
- TCC = SystemZCC::NE;
- break;
- case ISD::SETUNE:
- TCC = SystemZCC::LH;
- break;
- case ISD::SETO:
- TCC = SystemZCC::O;
- break;
- case ISD::SETUO:
- TCC = SystemZCC::NO;
- break;
- case ISD::SETULE:
- if (LHS.getValueType().isFloatingPoint()) {
- TCC = SystemZCC::NH;
- break;
- }
- isUnsigned = true; // FALLTHROUGH
- case ISD::SETLE:
- case ISD::SETOLE:
- TCC = SystemZCC::LE;
- break;
- case ISD::SETUGE:
- if (LHS.getValueType().isFloatingPoint()) {
- TCC = SystemZCC::NL;
- break;
- }
- isUnsigned = true; // FALLTHROUGH
- case ISD::SETGE:
- case ISD::SETOGE:
- TCC = SystemZCC::HE;
- break;
- case ISD::SETUGT:
- if (LHS.getValueType().isFloatingPoint()) {
- TCC = SystemZCC::NLE;
- break;
- }
- isUnsigned = true; // FALLTHROUGH
- case ISD::SETGT:
- case ISD::SETOGT:
- TCC = SystemZCC::H;
- break;
- case ISD::SETULT:
- if (LHS.getValueType().isFloatingPoint()) {
- TCC = SystemZCC::NHE;
- break;
- }
- isUnsigned = true; // FALLTHROUGH
- case ISD::SETLT:
- case ISD::SETOLT:
- TCC = SystemZCC::L;
- break;
- }
-
- SystemZCC = DAG.getConstant(TCC, MVT::i32);
-
- DebugLoc dl = LHS.getDebugLoc();
- return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
- dl, MVT::i64, LHS, RHS);
-}
-
-
-SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
-
- SDValue SystemZCC;
- SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
- return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, SystemZCC, Flag);
-}
-
-SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue TrueV = Op.getOperand(2);
- SDValue FalseV = Op.getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- DebugLoc dl = Op.getDebugLoc();
-
- SDValue SystemZCC;
- SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
- SmallVector<SDValue, 4> Ops;
- Ops.push_back(TrueV);
- Ops.push_back(FalseV);
- Ops.push_back(SystemZCC);
- Ops.push_back(Flag);
-
- return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
-}
-
-SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
-
- bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- bool ExtraLoadRequired =
- Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
-
- SDValue Result;
- if (!IsPic && !ExtraLoadRequired) {
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
- Offset = 0;
- } else {
- unsigned char OpFlags = 0;
- if (ExtraLoadRequired)
- OpFlags = SystemZII::MO_GOTENT;
-
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
- }
-
- Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
- getPointerTy(), Result);
-
- if (ExtraLoadRequired)
- Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, 0);
-
- // If there was a non-zero offset that we didn't fold, create an explicit
- // addition for it.
- if (Offset != 0)
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
- DAG.getConstant(Offset, getPointerTy()));
-
- return Result;
-}
-
-// FIXME: PIC here
-SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
-
- return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
-}
-
-
-// FIXME: PIC here
-// FIXME: This is just dirty hack. We need to lower cpool properly
-SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-
- SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
- CP->getAlignment(),
- CP->getOffset());
-
- return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
-}
-
-const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- case SystemZISD::RET_FLAG: return "SystemZISD::RET_FLAG";
- case SystemZISD::CALL: return "SystemZISD::CALL";
- case SystemZISD::BRCOND: return "SystemZISD::BRCOND";
- case SystemZISD::CMP: return "SystemZISD::CMP";
- case SystemZISD::UCMP: return "SystemZISD::UCMP";
- case SystemZISD::SELECT: return "SystemZISD::SELECT";
- case SystemZISD::PCRelativeWrapper: return "SystemZISD::PCRelativeWrapper";
- default: return NULL;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-MachineBasicBlock*
-SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- const SystemZInstrInfo &TII = *TM.getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
- assert((MI->getOpcode() == SystemZ::Select32 ||
- MI->getOpcode() == SystemZ::SelectF32 ||
- MI->getOpcode() == SystemZ::Select64 ||
- MI->getOpcode() == SystemZ::SelectF64) &&
- "Unexpected instr type to insert");
-
- // To "insert" a SELECT instruction, we actually have to insert the diamond
- // control-flow pattern. The incoming instruction knows the destination vreg
- // to set, the condition code register to branch on, the true/false values to
- // select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // cmpTY ccX, r1, r2
- // jCC copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
- SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
- F->insert(I, copy0MBB);
- F->insert(I, copy1MBB);
- // Update machine-CFG edges by transferring all successors of the current
- // block to the new block which will contain the Phi node for the select.
- copy1MBB->splice(copy1MBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(copy1MBB);
-
- BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to copy1MBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(copy1MBB);
-
- // copy1MBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
- // ...
- BB = copy1MBB;
- BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
deleted file mode 100644
index bab3dc23eead..000000000000
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ /dev/null
@@ -1,145 +0,0 @@
-//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that SystemZ uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
-#define LLVM_TARGET_SystemZ_ISELLOWERING_H
-
-#include "SystemZ.h"
-#include "SystemZRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
- namespace SystemZISD {
- enum {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// Return with a flag operand. Operand 0 is the chain operand.
- RET_FLAG,
-
- /// CALL - These operations represent an abstract call
- /// instruction, which includes a bunch of information.
- CALL,
-
- /// PCRelativeWrapper - PC relative address
- PCRelativeWrapper,
-
- /// CMP, UCMP - Compare instruction
- CMP,
- UCMP,
-
- /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
- /// the block to branch if condition is true, operand 2 is condition code
- /// and operand 3 is the flag operand produced by a CMP instruction.
- BRCOND,
-
- /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
- /// condition code and operand 3 is the flag operand.
- SELECT
- };
- }
-
- class SystemZSubtarget;
- class SystemZTargetMachine;
-
- class SystemZTargetLowering : public TargetLowering {
- public:
- explicit SystemZTargetLowering(SystemZTargetMachine &TM);
-
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
-
- /// LowerOperation - Provide custom lowering hooks for some operations.
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
- /// getTargetNodeName - This method returns the name of a target specific
- /// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
-
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
- TargetLowering::ConstraintType
- getConstraintType(const std::string &Constraint) const;
-
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue EmitCmp(SDValue LHS, SDValue RHS,
- ISD::CondCode CC, SDValue &SystemZCC,
- SelectionDAG &DAG) const;
-
-
- MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- /// isFPImmLegal - Returns true if the target can instruction select the
- /// specified FP immediate natively. If false, the legalizer will
- /// materialize the FP immediate as a load from a constant pool.
- virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-
- private:
- SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- SDValue LowerCCCArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
-
- const SystemZSubtarget &Subtarget;
- const SystemZTargetMachine &TM;
- const SystemZRegisterInfo *RegInfo;
- };
-} // namespace llvm
-
-#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
deleted file mode 100644
index ab45ec5984e3..000000000000
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes functions that may be used with BuildMI from the
-// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
-//
-// The BuildMem function may be used with the BuildMI function to add entire
-// memory references in a single, typed, function call.
-//
-// For reference, the order of operands for memory references is:
-// (Operand), Base, Displacement, Index.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZINSTRBUILDER_H
-#define SYSTEMZINSTRBUILDER_H
-
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-
-namespace llvm {
-
-/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
-/// The base register can be a frame index, which will eventually be replaced
-/// with R15 or R11 and Disp being offsetted accordingly.
-struct SystemZAddressMode {
- enum {
- RegBase,
- FrameIndexBase
- } BaseType;
-
- union {
- unsigned Reg;
- int FrameIndex;
- } Base;
-
- unsigned IndexReg;
- int32_t Disp;
- const GlobalValue *GV;
-
- SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
- Base.Reg = 0;
- }
-};
-
-/// addDirectMem - This function is used to add a direct memory reference to the
-/// current instruction -- that is, a dereference of an address in a register,
-/// with no index or displacement.
-///
-static inline const MachineInstrBuilder &
-addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
- // Because memory references are always represented with 3
- // values, this adds: Reg, [0, NoReg] to the instruction.
- return MIB.addReg(Reg).addImm(0).addReg(0);
-}
-
-static inline const MachineInstrBuilder &
-addOffset(const MachineInstrBuilder &MIB, int Offset) {
- return MIB.addImm(Offset).addReg(0);
-}
-
-/// addRegOffset - This function is used to add a memory reference of the form
-/// [Reg + Offset], i.e., one with no or index, but with a
-/// displacement. An example is: 10(%r15).
-///
-static inline const MachineInstrBuilder &
-addRegOffset(const MachineInstrBuilder &MIB,
- unsigned Reg, bool isKill, int Offset) {
- return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
-}
-
-/// addRegReg - This function is used to add a memory reference of the form:
-/// [Reg + Reg].
-static inline const MachineInstrBuilder &
-addRegReg(const MachineInstrBuilder &MIB,
- unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
- return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
- .addReg(Reg2, getKillRegState(isKill2));
-}
-
-static inline const MachineInstrBuilder &
-addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
- if (AM.BaseType == SystemZAddressMode::RegBase)
- MIB.addReg(AM.Base.Reg);
- else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
- MIB.addFrameIndex(AM.Base.FrameIndex);
- else
- assert(0);
-
- return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
-}
-
-/// addFrameReference - This function is used to add a reference to the base of
-/// an abstract object on the stack frame of the current function. This
-/// reference has base register as the FrameIndex offset until it is resolved.
-/// This allows a constant offset to be specified as well...
-///
-static inline const MachineInstrBuilder &
-addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
- MachineInstr *MI = MIB;
- MachineFunction &MF = *MI->getParent()->getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- const MCInstrDesc &MCID = MI->getDesc();
- unsigned Flags = 0;
- if (MCID.mayLoad())
- Flags |= MachineMemOperand::MOLoad;
- if (MCID.mayStore())
- Flags |= MachineMemOperand::MOStore;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(
- PseudoSourceValue::getFixedStack(FI), Offset),
- Flags, MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
- return addOffset(MIB.addFrameIndex(FI), Offset)
- .addMemOperand(MMO);
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
deleted file mode 100644
index a65828061d3b..000000000000
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ /dev/null
@@ -1,340 +0,0 @@
-//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the SystemZ (binary) floating point instructions in
-// TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// FIXME: multiclassify!
-
-//===----------------------------------------------------------------------===//
-// FP Pattern fragments
-
-def fpimm0 : PatLeaf<(fpimm), [{
- return N->isExactlyValue(+0.0);
-}]>;
-
-def fpimmneg0 : PatLeaf<(fpimm), [{
- return N->isExactlyValue(-0.0);
-}]>;
-
-let Uses = [PSW], usesCustomInserter = 1 in {
- def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
- "# SelectF32 PSEUDO",
- [(set FP32:$dst,
- (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
- def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
- "# SelectF64 PSEUDO",
- [(set FP64:$dst,
- (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Move Instructions
-
-// Floating point constant loads.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
- "lzer\t{$dst}",
- [(set FP32:$dst, fpimm0)]>;
-def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
- "lzdr\t{$dst}",
- [(set FP64:$dst, fpimm0)]>;
-}
-
-let neverHasSideEffects = 1 in {
-def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
- "ler\t{$dst, $src}",
- []>;
-def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
- "ldr\t{$dst, $src}",
- []>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
- "le\t{$dst, $src}",
- [(set FP32:$dst, (load rriaddr12:$src))]>;
-def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
- "ley\t{$dst, $src}",
- [(set FP32:$dst, (load rriaddr:$src))]>;
-def FMOV64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
- "ld\t{$dst, $src}",
- [(set FP64:$dst, (load rriaddr12:$src))]>;
-def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
- "ldy\t{$dst, $src}",
- [(set FP64:$dst, (load rriaddr:$src))]>;
-}
-
-def FMOV32mr : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
- "ste\t{$src, $dst}",
- [(store FP32:$src, rriaddr12:$dst)]>;
-def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
- "stey\t{$src, $dst}",
- [(store FP32:$src, rriaddr:$dst)]>;
-def FMOV64mr : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
- "std\t{$src, $dst}",
- [(store FP64:$src, rriaddr12:$dst)]>;
-def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
- "stdy\t{$src, $dst}",
- [(store FP64:$src, rriaddr:$dst)]>;
-
-def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
- "cpsdr\t{$dst, $src2, $src1}",
- [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
-def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
- "cpsdr\t{$dst, $src2, $src1}",
- [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
-
-//===----------------------------------------------------------------------===//
-// Arithmetic Instructions
-
-
-let Defs = [PSW] in {
-def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
- "lcebr\t{$dst, $src}",
- [(set FP32:$dst, (fneg FP32:$src)),
- (implicit PSW)]>;
-def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
- "lcdbr\t{$dst, $src}",
- [(set FP64:$dst, (fneg FP64:$src)),
- (implicit PSW)]>;
-
-def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
- "lpebr\t{$dst, $src}",
- [(set FP32:$dst, (fabs FP32:$src)),
- (implicit PSW)]>;
-def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
- "lpdbr\t{$dst, $src}",
- [(set FP64:$dst, (fabs FP64:$src)),
- (implicit PSW)]>;
-
-def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
- "lnebr\t{$dst, $src}",
- [(set FP32:$dst, (fneg(fabs FP32:$src))),
- (implicit PSW)]>;
-def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
- "lndbr\t{$dst, $src}",
- [(set FP64:$dst, (fneg(fabs FP64:$src))),
- (implicit PSW)]>;
-}
-
-let Constraints = "$src1 = $dst" in {
-let Defs = [PSW] in {
-let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
-def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
- "aebr\t{$dst, $src2}",
- [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
- (implicit PSW)]>;
-def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
- "adbr\t{$dst, $src2}",
- [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
- (implicit PSW)]>;
-}
-
-def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
- "aeb\t{$dst, $src2}",
- [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
- "adb\t{$dst, $src2}",
- [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-
-def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
- "sebr\t{$dst, $src2}",
- [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
- (implicit PSW)]>;
-def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
- "sdbr\t{$dst, $src2}",
- [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
- (implicit PSW)]>;
-
-def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
- "seb\t{$dst, $src2}",
- [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
- "sdb\t{$dst, $src2}",
- [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-} // Defs = [PSW]
-
-let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
-def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
- "meebr\t{$dst, $src2}",
- [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
-def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
- "mdbr\t{$dst, $src2}",
- [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
-}
-
-def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
- "meeb\t{$dst, $src2}",
- [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
-def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
- "mdb\t{$dst, $src2}",
- [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
-
-def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
- "maebr\t{$dst, $src3, $src2}",
- [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
- FP32:$src1))]>;
-def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
- "maeb\t{$dst, $src3, $src2}",
- [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
- FP32:$src3),
- FP32:$src1))]>;
-
-def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
- "madbr\t{$dst, $src3, $src2}",
- [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
- FP64:$src1))]>;
-def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
- "madb\t{$dst, $src3, $src2}",
- [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
- FP64:$src3),
- FP64:$src1))]>;
-
-def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
- "msebr\t{$dst, $src3, $src2}",
- [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
- FP32:$src1))]>;
-def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
- "mseb\t{$dst, $src3, $src2}",
- [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
- FP32:$src3),
- FP32:$src1))]>;
-
-def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
- "msdbr\t{$dst, $src3, $src2}",
- [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
- FP64:$src1))]>;
-def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
- "msdb\t{$dst, $src3, $src2}",
- [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
- FP64:$src3),
- FP64:$src1))]>;
-
-def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
- "debr\t{$dst, $src2}",
- [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
-def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
- "ddbr\t{$dst, $src2}",
- [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
-
-def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
- "deb\t{$dst, $src2}",
- [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
-def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
- "ddb\t{$dst, $src2}",
- [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
-
-} // Constraints = "$src1 = $dst"
-
-def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
- "sqebr\t{$dst, $src}",
- [(set FP32:$dst, (fsqrt FP32:$src))]>;
-def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
- "sqdbr\t{$dst, $src}",
- [(set FP64:$dst, (fsqrt FP64:$src))]>;
-
-def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
- "sqeb\t{$dst, $src}",
- [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
-def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
- "sqdb\t{$dst, $src}",
- [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
-
-def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
- "ledbr\t{$dst, $src}",
- [(set FP32:$dst, (fround FP64:$src))]>;
-
-def FEXT32r64 : Pseudo<(outs FP64:$dst), (ins FP32:$src),
- "ldebr\t{$dst, $src}",
- [(set FP64:$dst, (fextend FP32:$src))]>;
-def FEXT32m64 : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
- "ldeb\t{$dst, $src}",
- [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
-
-let Defs = [PSW] in {
-def FCONVFP32 : Pseudo<(outs FP32:$dst), (ins GR32:$src),
- "cefbr\t{$dst, $src}",
- [(set FP32:$dst, (sint_to_fp GR32:$src)),
- (implicit PSW)]>;
-def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
- "cegbr\t{$dst, $src}",
- [(set FP32:$dst, (sint_to_fp GR64:$src)),
- (implicit PSW)]>;
-
-def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
- "cdfbr\t{$dst, $src}",
- [(set FP64:$dst, (sint_to_fp GR32:$src)),
- (implicit PSW)]>;
-def FCONVFP64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
- "cdgbr\t{$dst, $src}",
- [(set FP64:$dst, (sint_to_fp GR64:$src)),
- (implicit PSW)]>;
-
-def FCONVGR32 : Pseudo<(outs GR32:$dst), (ins FP32:$src),
- "cfebr\t{$dst, 5, $src}",
- [(set GR32:$dst, (fp_to_sint FP32:$src)),
- (implicit PSW)]>;
-def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
- "cfdbr\t{$dst, 5, $src}",
- [(set GR32:$dst, (fp_to_sint FP64:$src)),
- (implicit PSW)]>;
-
-def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
- "cgebr\t{$dst, 5, $src}",
- [(set GR64:$dst, (fp_to_sint FP32:$src)),
- (implicit PSW)]>;
-def FCONVGR64 : Pseudo<(outs GR64:$dst), (ins FP64:$src),
- "cgdbr\t{$dst, 5, $src}",
- [(set GR64:$dst, (fp_to_sint FP64:$src)),
- (implicit PSW)]>;
-} // Defs = [PSW]
-
-def FBCONVG64 : Pseudo<(outs GR64:$dst), (ins FP64:$src),
- "lgdr\t{$dst, $src}",
- [(set GR64:$dst, (bitconvert FP64:$src))]>;
-def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
- "ldgr\t{$dst, $src}",
- [(set FP64:$dst, (bitconvert GR64:$src))]>;
-
-//===----------------------------------------------------------------------===//
-// Test instructions (like AND but do not produce any result)
-
-// Integer comparisons
-let Defs = [PSW] in {
-def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
- "cebr\t$src1, $src2",
- [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
-def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
- "cdbr\t$src1, $src2",
- [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
-
-def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
- "ceb\t$src1, $src2",
- [(set PSW, (SystemZcmp FP32:$src1,
- (load rriaddr12:$src2)))]>;
-def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
- "cdb\t$src1, $src2",
- [(set PSW, (SystemZcmp FP64:$src1,
- (load rriaddr12:$src2)))]>;
-} // Defs = [PSW]
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Floating point constant -0.0
-def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
-def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
deleted file mode 100644
index b4a8993c1971..000000000000
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ /dev/null
@@ -1,133 +0,0 @@
-//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<5> val> {
- bits<5> Value = val;
-}
-
-def Pseudo : Format<0>;
-def EForm : Format<1>;
-def IForm : Format<2>;
-def RIForm : Format<3>;
-def RIEForm : Format<4>;
-def RILForm : Format<5>;
-def RISForm : Format<6>;
-def RRForm : Format<7>;
-def RREForm : Format<8>;
-def RRFForm : Format<9>;
-def RRRForm : Format<10>;
-def RRSForm : Format<11>;
-def RSForm : Format<12>;
-def RSIForm : Format<13>;
-def RSILForm : Format<14>;
-def RSYForm : Format<15>;
-def RXForm : Format<16>;
-def RXEForm : Format<17>;
-def RXFForm : Format<18>;
-def RXYForm : Format<19>;
-def SForm : Format<20>;
-def SIForm : Format<21>;
-def SILForm : Format<22>;
-def SIYForm : Format<23>;
-def SSForm : Format<24>;
-def SSEForm : Format<25>;
-def SSFForm : Format<26>;
-
-class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
- let Namespace = "SystemZ";
-
- bits<16> Opcode = op;
-
- Format Form = f;
- bits<5> FormBits = Form.Value;
-
- dag OutOperandList = outs;
- dag InOperandList = ins;
-}
-
-class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : InstSystemZ<0, f, outs, ins> {
- let Opcode{0-7} = op;
- let Opcode{8-15} = 0;
-
- let Pattern = pattern;
- let AsmString = asmstr;
-}
-
-class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : InstSystemZ<0, f, outs, ins> {
- let Opcode{0-11} = op;
- let Opcode{12-15} = 0;
-
- let Pattern = pattern;
- let AsmString = asmstr;
-}
-
-class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : InstSystemZ<op, f, outs, ins> {
- let Pattern = pattern;
- let AsmString = asmstr;
-}
-
-class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I8<op, RRForm, outs, ins, asmstr, pattern>;
-
-class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I12<op, RIForm, outs, ins, asmstr, pattern>;
-
-class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I12<op, RILForm, outs, ins, asmstr, pattern>;
-
-class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I16<op, RREForm, outs, ins, asmstr, pattern>;
-
-class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I8<op, RXForm, outs, ins, asmstr, pattern> {
- let AddedComplexity = 1;
-}
-
-class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I16<op, RXYForm, outs, ins, asmstr, pattern>;
-
-class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I8<op, RSForm, outs, ins, asmstr, pattern> {
- let AddedComplexity = 1;
-}
-
-class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I16<op, RSYForm, outs, ins, asmstr, pattern>;
-
-class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I8<op, SIForm, outs, ins, asmstr, pattern> {
- let AddedComplexity = 1;
-}
-
-class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I16<op, SIYForm, outs, ins, asmstr, pattern>;
-
-class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
- : I16<op, SILForm, outs, ins, asmstr, pattern>;
-
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstSystemZ<0, Pseudo, outs, ins> {
-
- let Pattern = pattern;
- let AsmString = asmstr;
-}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
deleted file mode 100644
index 5f3dd80f2d90..000000000000
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ /dev/null
@@ -1,439 +0,0 @@
-//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZInstrBuilder.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZMachineFunctionInfo.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_CTOR
-#include "SystemZGenInstrInfo.inc"
-
-using namespace llvm;
-
-SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
- : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
- RI(tm, *this), TM(tm) {
-}
-
-/// isGVStub - Return true if the GV requires an extra load to get the
-/// real address.
-static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
- return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
-}
-
-void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- unsigned Opc = 0;
- if (RC == &SystemZ::GR32RegClass ||
- RC == &SystemZ::ADDR32RegClass)
- Opc = SystemZ::MOV32mr;
- else if (RC == &SystemZ::GR64RegClass ||
- RC == &SystemZ::ADDR64RegClass) {
- Opc = SystemZ::MOV64mr;
- } else if (RC == &SystemZ::FP32RegClass) {
- Opc = SystemZ::FMOV32mr;
- } else if (RC == &SystemZ::FP64RegClass) {
- Opc = SystemZ::FMOV64mr;
- } else if (RC == &SystemZ::GR64PRegClass) {
- Opc = SystemZ::MOV64Pmr;
- } else if (RC == &SystemZ::GR128RegClass) {
- Opc = SystemZ::MOV128mr;
- } else
- llvm_unreachable("Unsupported regclass to store");
-
- addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
- .addReg(SrcReg, getKillRegState(isKill));
-}
-
-void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const{
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- unsigned Opc = 0;
- if (RC == &SystemZ::GR32RegClass ||
- RC == &SystemZ::ADDR32RegClass)
- Opc = SystemZ::MOV32rm;
- else if (RC == &SystemZ::GR64RegClass ||
- RC == &SystemZ::ADDR64RegClass) {
- Opc = SystemZ::MOV64rm;
- } else if (RC == &SystemZ::FP32RegClass) {
- Opc = SystemZ::FMOV32rm;
- } else if (RC == &SystemZ::FP64RegClass) {
- Opc = SystemZ::FMOV64rm;
- } else if (RC == &SystemZ::GR64PRegClass) {
- Opc = SystemZ::MOV64Prm;
- } else if (RC == &SystemZ::GR128RegClass) {
- Opc = SystemZ::MOV128rm;
- } else
- llvm_unreachable("Unsupported regclass to load");
-
- addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
-}
-
-void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned Opc;
- if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
- Opc = SystemZ::MOV64rr;
- else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
- Opc = SystemZ::MOV32rr;
- else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
- Opc = SystemZ::MOV64rrP;
- else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
- Opc = SystemZ::MOV128rr;
- else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
- Opc = SystemZ::FMOV32rr;
- else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
- Opc = SystemZ::FMOV64rr;
- else
- llvm_unreachable("Impossible reg-to-reg copy");
-
- BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case SystemZ::MOV32rm:
- case SystemZ::MOV32rmy:
- case SystemZ::MOV64rm:
- case SystemZ::MOVSX32rm8:
- case SystemZ::MOVSX32rm16y:
- case SystemZ::MOVSX64rm8:
- case SystemZ::MOVSX64rm16:
- case SystemZ::MOVSX64rm32:
- case SystemZ::MOVZX32rm8:
- case SystemZ::MOVZX32rm16:
- case SystemZ::MOVZX64rm8:
- case SystemZ::MOVZX64rm16:
- case SystemZ::MOVZX64rm32:
- case SystemZ::FMOV32rm:
- case SystemZ::FMOV32rmy:
- case SystemZ::FMOV64rm:
- case SystemZ::FMOV64rmy:
- case SystemZ::MOV64Prm:
- case SystemZ::MOV64Prmy:
- case SystemZ::MOV128rm:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
- MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
-}
-
-unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case SystemZ::MOV32mr:
- case SystemZ::MOV32mry:
- case SystemZ::MOV64mr:
- case SystemZ::MOV32m8r:
- case SystemZ::MOV32m8ry:
- case SystemZ::MOV32m16r:
- case SystemZ::MOV32m16ry:
- case SystemZ::MOV64m8r:
- case SystemZ::MOV64m8ry:
- case SystemZ::MOV64m16r:
- case SystemZ::MOV64m16ry:
- case SystemZ::MOV64m32r:
- case SystemZ::MOV64m32ry:
- case SystemZ::FMOV32mr:
- case SystemZ::FMOV32mry:
- case SystemZ::FMOV64mr:
- case SystemZ::FMOV64mry:
- case SystemZ::MOV64Pmr:
- case SystemZ::MOV64Pmry:
- case SystemZ::MOV128mr:
- if (MI->getOperand(0).isFI() &&
- MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
- MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
- FrameIndex = MI->getOperand(0).getIndex();
- return MI->getOperand(3).getReg();
- }
- break;
- }
- return 0;
-}
-
-bool SystemZInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() == 1 && "Invalid Xbranch condition!");
-
- SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
- Cond[0].setImm(getOppositeCondition(CC));
- return false;
-}
-
-bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
-
- // Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
- return true;
- if (!MCID.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
-bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // Start from the bottom of the block and work up, examining the
- // terminator instructions.
- MachineBasicBlock::iterator I = MBB.end();
- while (I != MBB.begin()) {
- --I;
- if (I->isDebugValue())
- continue;
- // Working from the bottom, when we see a non-terminator
- // instruction, we're done.
- if (!isUnpredicatedTerminator(I))
- break;
-
- // A terminator that isn't a branch can't easily be handled
- // by this analysis.
- if (!I->getDesc().isBranch())
- return true;
-
- // Handle unconditional branches.
- if (I->getOpcode() == SystemZ::JMP) {
- if (!AllowModify) {
- TBB = I->getOperand(0).getMBB();
- continue;
- }
-
- // If the block has any instructions after a JMP, delete them.
- while (llvm::next(I) != MBB.end())
- llvm::next(I)->eraseFromParent();
- Cond.clear();
- FBB = 0;
-
- // Delete the JMP if it's equivalent to a fall-through.
- if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
- I->eraseFromParent();
- I = MBB.end();
- continue;
- }
-
- // TBB is used to indicate the unconditinal destination.
- TBB = I->getOperand(0).getMBB();
- continue;
- }
-
- // Handle conditional branches.
- SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
- if (BranchCode == SystemZCC::INVALID)
- return true; // Can't handle indirect branch.
-
- // Working from the bottom, handle the first conditional branch.
- if (Cond.empty()) {
- FBB = TBB;
- TBB = I->getOperand(0).getMBB();
- Cond.push_back(MachineOperand::CreateImm(BranchCode));
- continue;
- }
-
- // Handle subsequent conditional branches. Only handle the case where all
- // conditional branches branch to the same destination.
- assert(Cond.size() == 1);
- assert(TBB);
-
- // Only handle the case where all conditional branches branch to
- // the same destination.
- if (TBB != I->getOperand(0).getMBB())
- return true;
-
- SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
- // If the conditions are the same, we can leave them alone.
- if (OldBranchCode == BranchCode)
- continue;
-
- return true;
- }
-
- return false;
-}
-
-unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- unsigned Count = 0;
-
- while (I != MBB.begin()) {
- --I;
- if (I->isDebugValue())
- continue;
- if (I->getOpcode() != SystemZ::JMP &&
- getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
- break;
- // Remove the branch.
- I->eraseFromParent();
- I = MBB.end();
- ++Count;
- }
-
- return Count;
-}
-
-unsigned
-SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- // Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 1 || Cond.size() == 0) &&
- "SystemZ branch conditions have one component!");
-
- if (Cond.empty()) {
- // Unconditional branch?
- assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
- return 1;
- }
-
- // Conditional branch.
- unsigned Count = 0;
- SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
- BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
- ++Count;
-
- if (FBB) {
- // Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
- ++Count;
- }
- return Count;
-}
-
-const MCInstrDesc&
-SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
- switch (CC) {
- default:
- llvm_unreachable("Unknown condition code!");
- case SystemZCC::O: return get(SystemZ::JO);
- case SystemZCC::H: return get(SystemZ::JH);
- case SystemZCC::NLE: return get(SystemZ::JNLE);
- case SystemZCC::L: return get(SystemZ::JL);
- case SystemZCC::NHE: return get(SystemZ::JNHE);
- case SystemZCC::LH: return get(SystemZ::JLH);
- case SystemZCC::NE: return get(SystemZ::JNE);
- case SystemZCC::E: return get(SystemZ::JE);
- case SystemZCC::NLH: return get(SystemZ::JNLH);
- case SystemZCC::HE: return get(SystemZ::JHE);
- case SystemZCC::NL: return get(SystemZ::JNL);
- case SystemZCC::LE: return get(SystemZ::JLE);
- case SystemZCC::NH: return get(SystemZ::JNH);
- case SystemZCC::NO: return get(SystemZ::JNO);
- }
-}
-
-SystemZCC::CondCodes
-SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
- switch (Opc) {
- default: return SystemZCC::INVALID;
- case SystemZ::JO: return SystemZCC::O;
- case SystemZ::JH: return SystemZCC::H;
- case SystemZ::JNLE: return SystemZCC::NLE;
- case SystemZ::JL: return SystemZCC::L;
- case SystemZ::JNHE: return SystemZCC::NHE;
- case SystemZ::JLH: return SystemZCC::LH;
- case SystemZ::JNE: return SystemZCC::NE;
- case SystemZ::JE: return SystemZCC::E;
- case SystemZ::JNLH: return SystemZCC::NLH;
- case SystemZ::JHE: return SystemZCC::HE;
- case SystemZ::JNL: return SystemZCC::NL;
- case SystemZ::JLE: return SystemZCC::LE;
- case SystemZ::JNH: return SystemZCC::NH;
- case SystemZ::JNO: return SystemZCC::NO;
- }
-}
-
-SystemZCC::CondCodes
-SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
- switch (CC) {
- default:
- llvm_unreachable("Invalid condition!");
- case SystemZCC::O: return SystemZCC::NO;
- case SystemZCC::H: return SystemZCC::NH;
- case SystemZCC::NLE: return SystemZCC::LE;
- case SystemZCC::L: return SystemZCC::NL;
- case SystemZCC::NHE: return SystemZCC::HE;
- case SystemZCC::LH: return SystemZCC::NLH;
- case SystemZCC::NE: return SystemZCC::E;
- case SystemZCC::E: return SystemZCC::NE;
- case SystemZCC::NLH: return SystemZCC::LH;
- case SystemZCC::HE: return SystemZCC::NHE;
- case SystemZCC::NL: return SystemZCC::L;
- case SystemZCC::LE: return SystemZCC::NLE;
- case SystemZCC::NH: return SystemZCC::H;
- case SystemZCC::NO: return SystemZCC::O;
- }
-}
-
-const MCInstrDesc&
-SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
- switch (Opc) {
- default:
- llvm_unreachable("Don't have long disp version of this instruction");
- case SystemZ::MOV32mr: return get(SystemZ::MOV32mry);
- case SystemZ::MOV32rm: return get(SystemZ::MOV32rmy);
- case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
- case SystemZ::MOV32m8r: return get(SystemZ::MOV32m8ry);
- case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
- case SystemZ::MOV64m8r: return get(SystemZ::MOV64m8ry);
- case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
- case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
- case SystemZ::MOV8mi: return get(SystemZ::MOV8miy);
- case SystemZ::MUL32rm: return get(SystemZ::MUL32rmy);
- case SystemZ::CMP32rm: return get(SystemZ::CMP32rmy);
- case SystemZ::UCMP32rm: return get(SystemZ::UCMP32rmy);
- case SystemZ::FMOV32mr: return get(SystemZ::FMOV32mry);
- case SystemZ::FMOV64mr: return get(SystemZ::FMOV64mry);
- case SystemZ::FMOV32rm: return get(SystemZ::FMOV32rmy);
- case SystemZ::FMOV64rm: return get(SystemZ::FMOV64rmy);
- case SystemZ::MOV64Pmr: return get(SystemZ::MOV64Pmry);
- case SystemZ::MOV64Prm: return get(SystemZ::MOV64Prmy);
- }
-}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
deleted file mode 100644
index 6a31e9496365..000000000000
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ /dev/null
@@ -1,113 +0,0 @@
-//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
-#define LLVM_TARGET_SYSTEMZINSTRINFO_H
-
-#include "SystemZ.h"
-#include "SystemZRegisterInfo.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "SystemZGenInstrInfo.inc"
-
-namespace llvm {
-
-class SystemZTargetMachine;
-
-/// SystemZII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace SystemZII {
- enum {
- //===------------------------------------------------------------------===//
- // SystemZ Specific MachineOperand flags.
-
- MO_NO_FLAG = 0,
-
- /// MO_GOTENT - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
- ///
- /// SYMBOL_LABEL @GOTENT
- MO_GOTENT = 1,
-
- /// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
- ///
- /// SYMBOL_LABEL @PLT
- MO_PLT = 2
- };
-}
-
-class SystemZInstrInfo : public SystemZGenInstrInfo {
- const SystemZRegisterInfo RI;
- SystemZTargetMachine &TM;
-public:
- explicit SystemZInstrInfo(SystemZTargetMachine &TM);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
- unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
- SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
- const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
- const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
-
- const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
- if (Offset < 0 || Offset >= 4096)
- return getLongDispOpc(Opc);
- else
- return get(Opc);
- }
-};
-
-}
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
deleted file mode 100644
index 580d65b27f8a..000000000000
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ /dev/null
@@ -1,1147 +0,0 @@
-//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the SystemZ instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SystemZ Instruction Predicate Definitions.
-def IsZ10 : Predicate<"Subtarget.isZ10()">;
-
-include "SystemZInstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-// Type Constraints.
-//===----------------------------------------------------------------------===//
-class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
-class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
-class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
-class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
-
-//===----------------------------------------------------------------------===//
-// Type Profiles.
-//===----------------------------------------------------------------------===//
-def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
-def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
-def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>,
- SDTCisSameAs<1, 2>]>;
-def SDT_BrCond : SDTypeProfile<0, 3,
- [SDTCisVT<0, OtherVT>,
- SDTCisI8<1>, SDTCisVT<2, i64>]>;
-def SDT_SelectCC : SDTypeProfile<1, 4,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
- SDTCisI8<3>, SDTCisVT<4, i64>]>;
-def SDT_Address : SDTypeProfile<1, 1,
- [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-
-//===----------------------------------------------------------------------===//
-// SystemZ Specific Node Definitions.
-//===----------------------------------------------------------------------===//
-def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
-def SystemZcallseq_start :
- SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
-def SystemZcallseq_end :
- SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
-def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
-def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
- [SDNPHasChain]>;
-def SystemZselect : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
-def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
-
-
-include "SystemZOperands.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction list..
-
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(SystemZcallseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
- "#ADJCALLSTACKUP",
- [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
-
-let Uses = [PSW], usesCustomInserter = 1 in {
- def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
- "# Select32 PSEUDO",
- [(set GR32:$dst,
- (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
- def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
- "# Select64 PSEUDO",
- [(set GR64:$dst,
- (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Control Flow Instructions...
-//
-
-// FIXME: Provide proper encoding!
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
- def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
-}
-
-let isBranch = 1, isTerminator = 1 in {
- let isBarrier = 1 in {
- def JMP : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
-
- let isIndirectBranch = 1 in
- def JMPr : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
- }
-
- let Uses = [PSW] in {
- def JO : Pseudo<(outs), (ins brtarget:$dst),
- "jo\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
- def JH : Pseudo<(outs), (ins brtarget:$dst),
- "jh\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
- def JNLE: Pseudo<(outs), (ins brtarget:$dst),
- "jnle\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
- def JL : Pseudo<(outs), (ins brtarget:$dst),
- "jl\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
- def JNHE: Pseudo<(outs), (ins brtarget:$dst),
- "jnhe\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
- def JLH : Pseudo<(outs), (ins brtarget:$dst),
- "jlh\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
- def JNE : Pseudo<(outs), (ins brtarget:$dst),
- "jne\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
- def JE : Pseudo<(outs), (ins brtarget:$dst),
- "je\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
- def JNLH: Pseudo<(outs), (ins brtarget:$dst),
- "jnlh\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
- def JHE : Pseudo<(outs), (ins brtarget:$dst),
- "jhe\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
- def JNL : Pseudo<(outs), (ins brtarget:$dst),
- "jnl\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
- def JLE : Pseudo<(outs), (ins brtarget:$dst),
- "jle\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
- def JNH : Pseudo<(outs), (ins brtarget:$dst),
- "jnh\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
- def JNO : Pseudo<(outs), (ins brtarget:$dst),
- "jno\t$dst",
- [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
- } // Uses = [PSW]
-} // isBranch = 1
-
-//===----------------------------------------------------------------------===//
-// Call Instructions...
-//
-
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. Uses for argument
- // registers are added manually.
- let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
- def CALLi : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
- "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
- def CALLr : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
- "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
- }
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions.
-//
-
-let isReMaterializable = 1 in
-// FIXME: Provide imm12 variant
-// FIXME: Address should be halfword aligned...
-def LA64r : RXI<0x47,
- (outs GR64:$dst), (ins laaddr:$src),
- "lay\t{$dst, $src}",
- [(set GR64:$dst, laaddr:$src)]>;
-def LA64rm : RXYI<0x71E3,
- (outs GR64:$dst), (ins i64imm:$src),
- "larl\t{$dst, $src}",
- [(set GR64:$dst,
- (SystemZpcrelwrapper tglobaladdr:$src))]>;
-
-let neverHasSideEffects = 1 in
-def NOP : Pseudo<(outs), (ins), "# no-op", []>;
-
-//===----------------------------------------------------------------------===//
-// Move Instructions
-
-let neverHasSideEffects = 1 in {
-def MOV32rr : RRI<0x18,
- (outs GR32:$dst), (ins GR32:$src),
- "lr\t{$dst, $src}",
- []>;
-def MOV64rr : RREI<0xB904,
- (outs GR64:$dst), (ins GR64:$src),
- "lgr\t{$dst, $src}",
- []>;
-def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
- "# MOV128 PSEUDO!\n"
- "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
- "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
- []>;
-def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
- "# MOV64P PSEUDO!\n"
- "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
- "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
- []>;
-}
-
-def MOVSX64rr32 : RREI<0xB914,
- (outs GR64:$dst), (ins GR32:$src),
- "lgfr\t{$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))]>;
-def MOVZX64rr32 : RREI<0xB916,
- (outs GR64:$dst), (ins GR32:$src),
- "llgfr\t{$dst, $src}",
- [(set GR64:$dst, (zext GR32:$src))]>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV32ri16 : RII<0x8A7,
- (outs GR32:$dst), (ins s16imm:$src),
- "lhi\t{$dst, $src}",
- [(set GR32:$dst, immSExt16:$src)]>;
-def MOV64ri16 : RII<0x9A7,
- (outs GR64:$dst), (ins s16imm64:$src),
- "lghi\t{$dst, $src}",
- [(set GR64:$dst, immSExt16:$src)]>;
-
-def MOV64rill16 : RII<0xFA5,
- (outs GR64:$dst), (ins u16imm:$src),
- "llill\t{$dst, $src}",
- [(set GR64:$dst, i64ll16:$src)]>;
-def MOV64rilh16 : RII<0xEA5,
- (outs GR64:$dst), (ins u16imm:$src),
- "llilh\t{$dst, $src}",
- [(set GR64:$dst, i64lh16:$src)]>;
-def MOV64rihl16 : RII<0xDA5,
- (outs GR64:$dst), (ins u16imm:$src),
- "llihl\t{$dst, $src}",
- [(set GR64:$dst, i64hl16:$src)]>;
-def MOV64rihh16 : RII<0xCA5,
- (outs GR64:$dst), (ins u16imm:$src),
- "llihh\t{$dst, $src}",
- [(set GR64:$dst, i64hh16:$src)]>;
-
-def MOV64ri32 : RILI<0x1C0,
- (outs GR64:$dst), (ins s32imm64:$src),
- "lgfi\t{$dst, $src}",
- [(set GR64:$dst, immSExt32:$src)]>;
-def MOV64rilo32 : RILI<0xFC0,
- (outs GR64:$dst), (ins u32imm:$src),
- "llilf\t{$dst, $src}",
- [(set GR64:$dst, i64lo32:$src)]>;
-def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
- "llihf\t{$dst, $src}",
- [(set GR64:$dst, i64hi32:$src)]>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def MOV32rm : RXI<0x58,
- (outs GR32:$dst), (ins rriaddr12:$src),
- "l\t{$dst, $src}",
- [(set GR32:$dst, (load rriaddr12:$src))]>;
-def MOV32rmy : RXYI<0x58E3,
- (outs GR32:$dst), (ins rriaddr:$src),
- "ly\t{$dst, $src}",
- [(set GR32:$dst, (load rriaddr:$src))]>;
-def MOV64rm : RXYI<0x04E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "lg\t{$dst, $src}",
- [(set GR64:$dst, (load rriaddr:$src))]>;
-def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
- "# MOV64P PSEUDO!\n"
- "\tl\t${dst:subreg_odd}, $src\n"
- "\tl\t${dst:subreg_even}, 4+$src",
- [(set GR64P:$dst, (load rriaddr12:$src))]>;
-def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
- "# MOV64P PSEUDO!\n"
- "\tly\t${dst:subreg_odd}, $src\n"
- "\tly\t${dst:subreg_even}, 4+$src",
- [(set GR64P:$dst, (load rriaddr:$src))]>;
-def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
- "# MOV128 PSEUDO!\n"
- "\tlg\t${dst:subreg_odd}, $src\n"
- "\tlg\t${dst:subreg_even}, 8+$src",
- [(set GR128:$dst, (load rriaddr:$src))]>;
-}
-
-def MOV32mr : RXI<0x50,
- (outs), (ins rriaddr12:$dst, GR32:$src),
- "st\t{$src, $dst}",
- [(store GR32:$src, rriaddr12:$dst)]>;
-def MOV32mry : RXYI<0x50E3,
- (outs), (ins rriaddr:$dst, GR32:$src),
- "sty\t{$src, $dst}",
- [(store GR32:$src, rriaddr:$dst)]>;
-def MOV64mr : RXYI<0x24E3,
- (outs), (ins rriaddr:$dst, GR64:$src),
- "stg\t{$src, $dst}",
- [(store GR64:$src, rriaddr:$dst)]>;
-def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
- "# MOV64P PSEUDO!\n"
- "\tst\t${src:subreg_odd}, $dst\n"
- "\tst\t${src:subreg_even}, 4+$dst",
- [(store GR64P:$src, rriaddr12:$dst)]>;
-def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
- "# MOV64P PSEUDO!\n"
- "\tsty\t${src:subreg_odd}, $dst\n"
- "\tsty\t${src:subreg_even}, 4+$dst",
- [(store GR64P:$src, rriaddr:$dst)]>;
-def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
- "# MOV128 PSEUDO!\n"
- "\tstg\t${src:subreg_odd}, $dst\n"
- "\tstg\t${src:subreg_even}, 8+$dst",
- [(store GR128:$src, rriaddr:$dst)]>;
-
-def MOV8mi : SII<0x92,
- (outs), (ins riaddr12:$dst, i32i8imm:$src),
- "mvi\t{$dst, $src}",
- [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
-def MOV8miy : SIYI<0x52EB,
- (outs), (ins riaddr:$dst, i32i8imm:$src),
- "mviy\t{$dst, $src}",
- [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
-
-let AddedComplexity = 2 in {
-def MOV16mi : SILI<0xE544,
- (outs), (ins riaddr12:$dst, s16imm:$src),
- "mvhhi\t{$dst, $src}",
- [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
- Requires<[IsZ10]>;
-def MOV32mi16 : SILI<0xE54C,
- (outs), (ins riaddr12:$dst, s32imm:$src),
- "mvhi\t{$dst, $src}",
- [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
- Requires<[IsZ10]>;
-def MOV64mi16 : SILI<0xE548,
- (outs), (ins riaddr12:$dst, s32imm64:$src),
- "mvghi\t{$dst, $src}",
- [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
- Requires<[IsZ10]>;
-}
-
-// sexts
-def MOVSX32rr8 : RREI<0xB926,
- (outs GR32:$dst), (ins GR32:$src),
- "lbr\t{$dst, $src}",
- [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
-def MOVSX64rr8 : RREI<0xB906,
- (outs GR64:$dst), (ins GR64:$src),
- "lgbr\t{$dst, $src}",
- [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
-def MOVSX32rr16 : RREI<0xB927,
- (outs GR32:$dst), (ins GR32:$src),
- "lhr\t{$dst, $src}",
- [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
-def MOVSX64rr16 : RREI<0xB907,
- (outs GR64:$dst), (ins GR64:$src),
- "lghr\t{$dst, $src}",
- [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
-
-// extloads
-def MOVSX32rm8 : RXYI<0x76E3,
- (outs GR32:$dst), (ins rriaddr:$src),
- "lb\t{$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
-def MOVSX32rm16 : RXI<0x48,
- (outs GR32:$dst), (ins rriaddr12:$src),
- "lh\t{$dst, $src}",
- [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
-def MOVSX32rm16y : RXYI<0x78E3,
- (outs GR32:$dst), (ins rriaddr:$src),
- "lhy\t{$dst, $src}",
- [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
-def MOVSX64rm8 : RXYI<0x77E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "lgb\t{$dst, $src}",
- [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
-def MOVSX64rm16 : RXYI<0x15E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "lgh\t{$dst, $src}",
- [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
-def MOVSX64rm32 : RXYI<0x14E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "lgf\t{$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
-
-def MOVZX32rm8 : RXYI<0x94E3,
- (outs GR32:$dst), (ins rriaddr:$src),
- "llc\t{$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
-def MOVZX32rm16 : RXYI<0x95E3,
- (outs GR32:$dst), (ins rriaddr:$src),
- "llh\t{$dst, $src}",
- [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
-def MOVZX64rm8 : RXYI<0x90E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "llgc\t{$dst, $src}",
- [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
-def MOVZX64rm16 : RXYI<0x91E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "llgh\t{$dst, $src}",
- [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
-def MOVZX64rm32 : RXYI<0x16E3,
- (outs GR64:$dst), (ins rriaddr:$src),
- "llgf\t{$dst, $src}",
- [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
-
-// truncstores
-def MOV32m8r : RXI<0x42,
- (outs), (ins rriaddr12:$dst, GR32:$src),
- "stc\t{$src, $dst}",
- [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
-
-def MOV32m8ry : RXYI<0x72E3,
- (outs), (ins rriaddr:$dst, GR32:$src),
- "stcy\t{$src, $dst}",
- [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
-
-def MOV32m16r : RXI<0x40,
- (outs), (ins rriaddr12:$dst, GR32:$src),
- "sth\t{$src, $dst}",
- [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
-
-def MOV32m16ry : RXYI<0x70E3,
- (outs), (ins rriaddr:$dst, GR32:$src),
- "sthy\t{$src, $dst}",
- [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
-
-def MOV64m8r : RXI<0x42,
- (outs), (ins rriaddr12:$dst, GR64:$src),
- "stc\t{$src, $dst}",
- [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
-
-def MOV64m8ry : RXYI<0x72E3,
- (outs), (ins rriaddr:$dst, GR64:$src),
- "stcy\t{$src, $dst}",
- [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
-
-def MOV64m16r : RXI<0x40,
- (outs), (ins rriaddr12:$dst, GR64:$src),
- "sth\t{$src, $dst}",
- [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
-
-def MOV64m16ry : RXYI<0x70E3,
- (outs), (ins rriaddr:$dst, GR64:$src),
- "sthy\t{$src, $dst}",
- [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
-
-def MOV64m32r : RXI<0x50,
- (outs), (ins rriaddr12:$dst, GR64:$src),
- "st\t{$src, $dst}",
- [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
-
-def MOV64m32ry : RXYI<0x50E3,
- (outs), (ins rriaddr:$dst, GR64:$src),
- "sty\t{$src, $dst}",
- [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
-
-// multiple regs moves
-// FIXME: should we use multiple arg nodes?
-def MOV32mrm : RSYI<0x90EB,
- (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
- "stmy\t{$from, $to, $dst}",
- []>;
-def MOV64mrm : RSYI<0x24EB,
- (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
- "stmg\t{$from, $to, $dst}",
- []>;
-def MOV32rmm : RSYI<0x90EB,
- (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
- "lmy\t{$from, $to, $dst}",
- []>;
-def MOV64rmm : RSYI<0x04EB,
- (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
- "lmg\t{$from, $to, $dst}",
- []>;
-
-let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1,
- Constraints = "$src = $dst" in {
-def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
- "lhi\t${dst:subreg_even}, 0",
- []>;
-def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
- "lghi\t${dst:subreg_even}, 0",
- []>;
-}
-
-// Byte swaps
-def BSWAP32rr : RREI<0xB91F,
- (outs GR32:$dst), (ins GR32:$src),
- "lrvr\t{$dst, $src}",
- [(set GR32:$dst, (bswap GR32:$src))]>;
-def BSWAP64rr : RREI<0xB90F,
- (outs GR64:$dst), (ins GR64:$src),
- "lrvgr\t{$dst, $src}",
- [(set GR64:$dst, (bswap GR64:$src))]>;
-
-// FIXME: this is invalid pattern for big-endian
-//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
-// "lrvh\t{$dst, $src}",
-// [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
-def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
- "lrv\t{$dst, $src}",
- [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
-def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
- "lrvg\t{$dst, $src}",
- [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
-
-//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
-// "strvh\t{$src, $dst}",
-// [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
-def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
- "strv\t{$src, $dst}",
- [(store (bswap GR32:$src), rriaddr:$dst)]>;
-def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
- "strvg\t{$src, $dst}",
- [(store (bswap GR64:$src), rriaddr:$dst)]>;
-
-//===----------------------------------------------------------------------===//
-// Arithmetic Instructions
-
-let Defs = [PSW] in {
-def NEG32rr : RRI<0x13,
- (outs GR32:$dst), (ins GR32:$src),
- "lcr\t{$dst, $src}",
- [(set GR32:$dst, (ineg GR32:$src)),
- (implicit PSW)]>;
-def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
- "lcgr\t{$dst, $src}",
- [(set GR64:$dst, (ineg GR64:$src)),
- (implicit PSW)]>;
-def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
- "lcgfr\t{$dst, $src}",
- [(set GR64:$dst, (ineg (sext GR32:$src))),
- (implicit PSW)]>;
-}
-
-let Constraints = "$src1 = $dst" in {
-
-let Defs = [PSW] in {
-
-let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
-def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "ar\t{$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
- (implicit PSW)]>;
-def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "agr\t{$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
- (implicit PSW)]>;
-}
-
-def ADD32rm : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
- "a\t{$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def ADD32rmy : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
- "ay\t{$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-def ADD64rm : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
- "ag\t{$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-
-
-def ADD32ri16 : RII<0xA7A,
- (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
- "ahi\t{$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
- (implicit PSW)]>;
-def ADD32ri : RILI<0xC29,
- (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
- "afi\t{$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
- (implicit PSW)]>;
-def ADD64ri16 : RILI<0xA7B,
- (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
- "aghi\t{$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
- (implicit PSW)]>;
-def ADD64ri32 : RILI<0xC28,
- (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
- "agfi\t{$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
- (implicit PSW)]>;
-
-let isCommutable = 1 in { // X = ADC Y, Z == X = ADC Z, Y
-def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "alr\t{$dst, $src2}",
- [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
-def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "algr\t{$dst, $src2}",
- [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
-}
-
-def ADC32ri : RILI<0xC2B,
- (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
- "alfi\t{$dst, $src2}",
- [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
-def ADC64ri32 : RILI<0xC2A,
- (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
- "algfi\t{$dst, $src2}",
- [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
-
-let Uses = [PSW] in {
-def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "alcr\t{$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
- (implicit PSW)]>;
-def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "alcgr\t{$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
- (implicit PSW)]>;
-}
-
-let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
-def AND32rr : RRI<0x14,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "nr\t{$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
-def AND64rr : RREI<0xB980,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "ngr\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
-}
-
-def AND32rm : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
- "n\t{$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def AND32rmy : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
- "ny\t{$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
- "ng\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-
-def AND32rill16 : RII<0xA57,
- (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
- "nill\t{$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
-def AND64rill16 : RII<0xA57,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "nill\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
-
-def AND32rilh16 : RII<0xA56,
- (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
- "nilh\t{$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
-def AND64rilh16 : RII<0xA56,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "nilh\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
-
-def AND64rihl16 : RII<0xA55,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "nihl\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
-def AND64rihh16 : RII<0xA54,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "nihh\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
-
-def AND32ri : RILI<0xC0B,
- (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
- "nilf\t{$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
-def AND64rilo32 : RILI<0xC0B,
- (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
- "nilf\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
-def AND64rihi32 : RILI<0xC0A,
- (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
- "nihf\t{$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
-
-let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
-def OR32rr : RRI<0x16,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "or\t{$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
-def OR64rr : RREI<0xB981,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "ogr\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
-}
-
-def OR32rm : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
- "o\t{$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def OR32rmy : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
- "oy\t{$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
- "og\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-
- // FIXME: Provide proper encoding!
-def OR32ri16 : RII<0xA5B,
- (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
- "oill\t{$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
-def OR32ri16h : RII<0xA5A,
- (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
- "oilh\t{$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
-def OR32ri : RILI<0xC0D,
- (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
- "oilf\t{$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
-
-def OR64rill16 : RII<0xA5B,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "oill\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
-def OR64rilh16 : RII<0xA5A,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "oilh\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
-def OR64rihl16 : RII<0xA59,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "oihl\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
-def OR64rihh16 : RII<0xA58,
- (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
- "oihh\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
-
-def OR64rilo32 : RILI<0xC0D,
- (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
- "oilf\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
-def OR64rihi32 : RILI<0xC0C,
- (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
- "oihf\t{$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
-
-def SUB32rr : RRI<0x1B,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "sr\t{$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
-def SUB64rr : RREI<0xB909,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "sgr\t{$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
-
-def SUB32rm : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
- "s\t{$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def SUB32rmy : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
- "sy\t{$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-def SUB64rm : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
- "sg\t{$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-
-def SBC32rr : RRI<0x1F,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "slr\t{$dst, $src2}",
- [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
-def SBC64rr : RREI<0xB90B,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "slgr\t{$dst, $src2}",
- [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
-
-def SBC32ri : RILI<0xC25,
- (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
- "sllfi\t{$dst, $src2}",
- [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
-def SBC64ri32 : RILI<0xC24,
- (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
- "slgfi\t{$dst, $src2}",
- [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
-
-let Uses = [PSW] in {
-def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "slbr\t{$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
- (implicit PSW)]>;
-def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "slbgr\t{$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
- (implicit PSW)]>;
-}
-
-let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
-def XOR32rr : RRI<0x17,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "xr\t{$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
-def XOR64rr : RREI<0xB982,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "xgr\t{$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
-}
-
-def XOR32rm : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
- "x\t{$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
- (implicit PSW)]>;
-def XOR32rmy : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
- "xy\t{$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-def XOR64rm : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
- "xg\t{$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
- (implicit PSW)]>;
-
-def XOR32ri : RILI<0xC07,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "xilf\t{$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
-
-} // Defs = [PSW]
-
-let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
-def MUL32rr : RREI<0xB252,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "msr\t{$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
-def MUL64rr : RREI<0xB90C,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "msgr\t{$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
-}
-
-def MUL64rrP : RRI<0x1C,
- (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
- "mr\t{$dst, $src2}",
- []>;
-def UMUL64rrP : RREI<0xB996,
- (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
- "mlr\t{$dst, $src2}",
- []>;
-def UMUL128rrP : RREI<0xB986,
- (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
- "mlgr\t{$dst, $src2}",
- []>;
-
-def MUL32ri16 : RII<0xA7C,
- (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
- "mhi\t{$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
-def MUL64ri16 : RII<0xA7D,
- (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
- "mghi\t{$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
-
-let AddedComplexity = 2 in {
-def MUL32ri : RILI<0xC21,
- (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
- "msfi\t{$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
- Requires<[IsZ10]>;
-def MUL64ri32 : RILI<0xC20,
- (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
- "msgfi\t{$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
- Requires<[IsZ10]>;
-}
-
-def MUL32rm : RXI<0x71,
- (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
- "ms\t{$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
-def MUL32rmy : RXYI<0xE351,
- (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
- "msy\t{$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
-def MUL64rm : RXYI<0xE30C,
- (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
- "msg\t{$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
-
-def MULSX64rr32 : RREI<0xB91C,
- (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
- "msgfr\t{$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
-
-def SDIVREM32r : RREI<0xB91D,
- (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
- "dsgfr\t{$dst, $src2}",
- []>;
-def SDIVREM64r : RREI<0xB90D,
- (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
- "dsgr\t{$dst, $src2}",
- []>;
-
-def UDIVREM32r : RREI<0xB997,
- (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
- "dlr\t{$dst, $src2}",
- []>;
-def UDIVREM64r : RREI<0xB987,
- (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
- "dlgr\t{$dst, $src2}",
- []>;
-let mayLoad = 1 in {
-def SDIVREM32m : RXYI<0xE31D,
- (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
- "dsgf\t{$dst, $src2}",
- []>;
-def SDIVREM64m : RXYI<0xE30D,
- (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
- "dsg\t{$dst, $src2}",
- []>;
-
-def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
- "dl\t{$dst, $src2}",
- []>;
-def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
- "dlg\t{$dst, $src2}",
- []>;
-} // mayLoad
-} // Constraints = "$src1 = $dst"
-
-//===----------------------------------------------------------------------===//
-// Shifts
-
-let Constraints = "$src = $dst" in
-def SRL32rri : RSI<0x88,
- (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
- "srl\t{$src, $amt}",
- [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
-def SRL64rri : RSYI<0xEB0C,
- (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
- "srlg\t{$dst, $src, $amt}",
- [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
-
-let Constraints = "$src = $dst" in
-def SHL32rri : RSI<0x89,
- (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
- "sll\t{$src, $amt}",
- [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
-def SHL64rri : RSYI<0xEB0D,
- (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
- "sllg\t{$dst, $src, $amt}",
- [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
-
-let Defs = [PSW] in {
-let Constraints = "$src = $dst" in
-def SRA32rri : RSI<0x8A,
- (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
- "sra\t{$src, $amt}",
- [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
- (implicit PSW)]>;
-
-def SRA64rri : RSYI<0xEB0A,
- (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
- "srag\t{$dst, $src, $amt}",
- [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
- (implicit PSW)]>;
-} // Defs = [PSW]
-
-def ROTL32rri : RSYI<0xEB1D,
- (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
- "rll\t{$dst, $src, $amt}",
- [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
-def ROTL64rri : RSYI<0xEB1C,
- (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
- "rllg\t{$dst, $src, $amt}",
- [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
-
-//===----------------------------------------------------------------------===//
-// Test instructions (like AND but do not produce any result)
-
-// Integer comparisons
-let Defs = [PSW] in {
-def CMP32rr : RRI<0x19,
- (outs), (ins GR32:$src1, GR32:$src2),
- "cr\t$src1, $src2",
- [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>;
-def CMP64rr : RREI<0xB920,
- (outs), (ins GR64:$src1, GR64:$src2),
- "cgr\t$src1, $src2",
- [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
-
-def CMP32ri : RILI<0xC2D,
- (outs), (ins GR32:$src1, s32imm:$src2),
- "cfi\t$src1, $src2",
- [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
-def CMP64ri32 : RILI<0xC2C,
- (outs), (ins GR64:$src1, s32imm64:$src2),
- "cgfi\t$src1, $src2",
- [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
-
-def CMP32rm : RXI<0x59,
- (outs), (ins GR32:$src1, rriaddr12:$src2),
- "c\t$src1, $src2",
- [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
-def CMP32rmy : RXYI<0xE359,
- (outs), (ins GR32:$src1, rriaddr:$src2),
- "cy\t$src1, $src2",
- [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
-def CMP64rm : RXYI<0xE320,
- (outs), (ins GR64:$src1, rriaddr:$src2),
- "cg\t$src1, $src2",
- [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
-
-def UCMP32rr : RRI<0x15,
- (outs), (ins GR32:$src1, GR32:$src2),
- "clr\t$src1, $src2",
- [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
-def UCMP64rr : RREI<0xB921,
- (outs), (ins GR64:$src1, GR64:$src2),
- "clgr\t$src1, $src2",
- [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
-
-def UCMP32ri : RILI<0xC2F,
- (outs), (ins GR32:$src1, i32imm:$src2),
- "clfi\t$src1, $src2",
- [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
-def UCMP64ri32 : RILI<0xC2E,
- (outs), (ins GR64:$src1, i64i32imm:$src2),
- "clgfi\t$src1, $src2",
- [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
-
-def UCMP32rm : RXI<0x55,
- (outs), (ins GR32:$src1, rriaddr12:$src2),
- "cl\t$src1, $src2",
- [(set PSW, (SystemZucmp GR32:$src1,
- (load rriaddr12:$src2)))]>;
-def UCMP32rmy : RXYI<0xE355,
- (outs), (ins GR32:$src1, rriaddr:$src2),
- "cly\t$src1, $src2",
- [(set PSW, (SystemZucmp GR32:$src1,
- (load rriaddr:$src2)))]>;
-def UCMP64rm : RXYI<0xE351,
- (outs), (ins GR64:$src1, rriaddr:$src2),
- "clg\t$src1, $src2",
- [(set PSW, (SystemZucmp GR64:$src1,
- (load rriaddr:$src2)))]>;
-
-def CMPSX64rr32 : RREI<0xB930,
- (outs), (ins GR64:$src1, GR32:$src2),
- "cgfr\t$src1, $src2",
- [(set PSW, (SystemZucmp GR64:$src1,
- (sext GR32:$src2)))]>;
-def UCMPZX64rr32 : RREI<0xB931,
- (outs), (ins GR64:$src1, GR32:$src2),
- "clgfr\t$src1, $src2",
- [(set PSW, (SystemZucmp GR64:$src1,
- (zext GR32:$src2)))]>;
-
-def CMPSX64rm32 : RXYI<0xE330,
- (outs), (ins GR64:$src1, rriaddr:$src2),
- "cgf\t$src1, $src2",
- [(set PSW, (SystemZucmp GR64:$src1,
- (sextloadi64i32 rriaddr:$src2)))]>;
-def UCMPZX64rm32 : RXYI<0xE331,
- (outs), (ins GR64:$src1, rriaddr:$src2),
- "clgf\t$src1, $src2",
- [(set PSW, (SystemZucmp GR64:$src1,
- (zextloadi64i32 rriaddr:$src2)))]>;
-
-// FIXME: Add other crazy ucmp forms
-
-} // Defs = [PSW]
-
-//===----------------------------------------------------------------------===//
-// Other crazy stuff
-let Defs = [PSW] in {
-def FLOGR64 : RREI<0xB983,
- (outs GR128:$dst), (ins GR64:$src),
- "flogr\t{$dst, $src}",
- []>;
-} // Defs = [PSW]
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns.
-//===----------------------------------------------------------------------===//
-
-// ConstPools, JumpTables
-def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
-def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
-
-// anyext
-def : Pat<(i64 (anyext GR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
-
-// calls
-def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
-def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
-
-//===----------------------------------------------------------------------===//
-// Peepholes.
-//===----------------------------------------------------------------------===//
-
-// FIXME: use add/sub tricks with 32678/-32768
-
-// Arbitrary immediate support.
-def : Pat<(i32 imm:$src),
- (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))),
- subreg_32bit)>;
-
-// Implement in terms of LLIHF/OILF.
-def : Pat<(i64 imm:$imm),
- (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
-
-// trunc patterns
-def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
-
-// extload patterns
-def : Pat<(extloadi32i8 rriaddr:$src), (MOVZX32rm8 rriaddr:$src)>;
-def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
-def : Pat<(extloadi64i8 rriaddr:$src), (MOVZX64rm8 rriaddr:$src)>;
-def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
-def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
-
-// muls
-def : Pat<(mulhs GR32:$src1, GR32:$src2),
- (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
- GR32:$src1, subreg_odd32),
- GR32:$src2),
- subreg_32bit)>;
-
-def : Pat<(mulhu GR32:$src1, GR32:$src2),
- (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
- GR32:$src1, subreg_odd32),
- GR32:$src2),
- subreg_32bit)>;
-def : Pat<(mulhu GR64:$src1, GR64:$src2),
- (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
- GR64:$src1, subreg_odd),
- GR64:$src2),
- subreg_even)>;
-
-def : Pat<(ctlz GR64:$src),
- (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
deleted file mode 100644
index fd6e330344b6..000000000000
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares SystemZ-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
-#define SYSTEMZMACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
-/// contains private SystemZ target-specific information for each MachineFunction.
-class SystemZMachineFunctionInfo : public MachineFunctionInfo {
- /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
- /// stack frame in bytes.
- unsigned CalleeSavedFrameSize;
-
- /// LowReg - Low register of range of callee-saved registers to store.
- unsigned LowReg;
-
- /// HighReg - High register of range of callee-saved registers to store.
- unsigned HighReg;
-public:
- SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
-
- explicit SystemZMachineFunctionInfo(MachineFunction &MF)
- : CalleeSavedFrameSize(0) {}
-
- unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
- void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
-
- unsigned getLowReg() const { return LowReg; }
- void setLowReg(unsigned Reg) { LowReg = Reg; }
-
- unsigned getHighReg() const { return HighReg; }
- void setHighReg(unsigned Reg) { HighReg = Reg; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
deleted file mode 100644
index 8b835cc26e29..000000000000
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ /dev/null
@@ -1,325 +0,0 @@
-//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the various SystemZ instruction operands.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction Pattern Stuff.
-//===----------------------------------------------------------------------===//
-
-// SystemZ specific condition code. These correspond to CondCode in
-// SystemZ.h. They must be kept in synch.
-def SYSTEMZ_COND_O : PatLeaf<(i8 0)>;
-def SYSTEMZ_COND_H : PatLeaf<(i8 1)>;
-def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
-def SYSTEMZ_COND_L : PatLeaf<(i8 3)>;
-def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
-def SYSTEMZ_COND_LH : PatLeaf<(i8 5)>;
-def SYSTEMZ_COND_NE : PatLeaf<(i8 6)>;
-def SYSTEMZ_COND_E : PatLeaf<(i8 7)>;
-def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
-def SYSTEMZ_COND_HE : PatLeaf<(i8 9)>;
-def SYSTEMZ_COND_NL : PatLeaf<(i8 10)>;
-def SYSTEMZ_COND_LE : PatLeaf<(i8 11)>;
-def SYSTEMZ_COND_NH : PatLeaf<(i8 12)>;
-def SYSTEMZ_COND_NO : PatLeaf<(i8 13)>;
-
-def LO8 : SDNodeXForm<imm, [{
- // Transformation function: return low 8 bits.
- return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
-}]>;
-
-def LL16 : SDNodeXForm<imm, [{
- // Transformation function: return low 16 bits.
- return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
-}]>;
-
-def LH16 : SDNodeXForm<imm, [{
- // Transformation function: return bits 16-31.
- return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
-}]>;
-
-def HL16 : SDNodeXForm<imm, [{
- // Transformation function: return bits 32-47.
- return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
-}]>;
-
-def HH16 : SDNodeXForm<imm, [{
- // Transformation function: return bits 48-63.
- return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
-}]>;
-
-def LO32 : SDNodeXForm<imm, [{
- // Transformation function: return low 32 bits.
- return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
-}]>;
-
-def HI32 : SDNodeXForm<imm, [{
- // Transformation function: return bits 32-63.
- return getI32Imm(N->getZExtValue() >> 32);
-}]>;
-
-def GetI64FromI32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
-}]>;
-
-def i32ll16 : PatLeaf<(i32 imm), [{
- // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
- // bits set.
- return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
-}], LL16>;
-
-def i32lh16 : PatLeaf<(i32 imm), [{
- // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
- return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
-}], LH16>;
-
-def i32ll16c : PatLeaf<(i32 imm), [{
- // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
- return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
-}], LL16>;
-
-def i32lh16c : PatLeaf<(i32 imm), [{
- // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
- // bits set.
- return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
-}], LH16>;
-
-def i64ll16 : PatLeaf<(i64 imm), [{
- // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
- // bits set.
- return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
-}], LL16>;
-
-def i64lh16 : PatLeaf<(i64 imm), [{
- // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
- return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
-}], LH16>;
-
-def i64hl16 : PatLeaf<(i64 imm), [{
- // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
- return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
-}], HL16>;
-
-def i64hh16 : PatLeaf<(i64 imm), [{
- // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
- return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
-}], HH16>;
-
-def i64ll16c : PatLeaf<(i64 imm), [{
- // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
- // bits set.
- return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
-}], LL16>;
-
-def i64lh16c : PatLeaf<(i64 imm), [{
- // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
- return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
-}], LH16>;
-
-def i64hl16c : PatLeaf<(i64 imm), [{
- // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
- return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
-}], HL16>;
-
-def i64hh16c : PatLeaf<(i64 imm), [{
- // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
- return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
-}], HH16>;
-
-def immSExt16 : PatLeaf<(imm), [{
- // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
- // field.
- if (N->getValueType(0) == MVT::i64) {
- uint64_t val = N->getZExtValue();
- return ((int64_t)val == (int16_t)val);
- } else if (N->getValueType(0) == MVT::i32) {
- uint32_t val = N->getZExtValue();
- return ((int32_t)val == (int16_t)val);
- }
-
- return false;
-}], LL16>;
-
-def immSExt32 : PatLeaf<(i64 imm), [{
- // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
- // field.
- uint64_t val = N->getZExtValue();
- return ((int64_t)val == (int32_t)val);
-}], LO32>;
-
-def i64lo32 : PatLeaf<(i64 imm), [{
- // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
- // bits set.
- return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
-}], LO32>;
-
-def i64hi32 : PatLeaf<(i64 imm), [{
- // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
- return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
-}], HI32>;
-
-def i64lo32c : PatLeaf<(i64 imm), [{
- // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
- // bits set.
- return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
-}], LO32>;
-
-def i64hi32c : PatLeaf<(i64 imm), [{
- // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
- return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
-}], HI32>;
-
-def i32immSExt8 : PatLeaf<(i32 imm), [{
- // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
- // sign extended field.
- return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
-}], LO8>;
-
-def i32immSExt16 : PatLeaf<(i32 imm), [{
- // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
- // sign extended field.
- return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
-}], LL16>;
-
-def i64immSExt32 : PatLeaf<(i64 imm), [{
- // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // sign extended field.
- return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
-}], LO32>;
-
-def i64immZExt32 : PatLeaf<(i64 imm), [{
- // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // zero extended field.
- return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}], LO32>;
-
-// extloads
-def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
-def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
-def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
-def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
-
-def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
-def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
-def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
-def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
-def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
-def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
-def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-// A couple of more descriptive operand definitions.
-// 32-bits but only 8 bits are significant.
-def i32i8imm : Operand<i32>;
-// 32-bits but only 16 bits are significant.
-def i32i16imm : Operand<i32>;
-// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64>;
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
-
-// Unsigned i12
-def u12imm : Operand<i32> {
- let PrintMethod = "printU12ImmOperand";
-}
-def u12imm64 : Operand<i64> {
- let PrintMethod = "printU12ImmOperand";
-}
-
-// Signed i16
-def s16imm : Operand<i32> {
- let PrintMethod = "printS16ImmOperand";
-}
-def s16imm64 : Operand<i64> {
- let PrintMethod = "printS16ImmOperand";
-}
-// Unsigned i16
-def u16imm : Operand<i32> {
- let PrintMethod = "printU16ImmOperand";
-}
-def u16imm64 : Operand<i64> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-// Signed i20
-def s20imm : Operand<i32> {
- let PrintMethod = "printS20ImmOperand";
-}
-def s20imm64 : Operand<i64> {
- let PrintMethod = "printS20ImmOperand";
-}
-// Signed i32
-def s32imm : Operand<i32> {
- let PrintMethod = "printS32ImmOperand";
-}
-def s32imm64 : Operand<i64> {
- let PrintMethod = "printS32ImmOperand";
-}
-// Unsigned i32
-def u32imm : Operand<i32> {
- let PrintMethod = "printU32ImmOperand";
-}
-def u32imm64 : Operand<i64> {
- let PrintMethod = "printU32ImmOperand";
-}
-
-def imm_pcrel : Operand<i64> {
- let PrintMethod = "printPCRelImmOperand";
-}
-
-//===----------------------------------------------------------------------===//
-// SystemZ Operand Definitions.
-//===----------------------------------------------------------------------===//
-
-// Address operands
-
-// riaddr := reg + imm
-def riaddr32 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
- let PrintMethod = "printRIAddrOperand";
- let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
-}
-
-def riaddr12 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrRI12", []> {
- let PrintMethod = "printRIAddrOperand";
- let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
-}
-
-def riaddr : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrRI", []> {
- let PrintMethod = "printRIAddrOperand";
- let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
-}
-
-//===----------------------------------------------------------------------===//
-
-// rriaddr := reg + reg + imm
-def rriaddr12 : Operand<i64>,
- ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
- let PrintMethod = "printRRIAddrOperand";
- let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
-}
-def rriaddr : Operand<i64>,
- ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
- let PrintMethod = "printRRIAddrOperand";
- let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
-}
-def laaddr : Operand<i64>,
- ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
- let PrintMethod = "printRRIAddrOperand";
- let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
-}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
deleted file mode 100644
index b1050d46e550..000000000000
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZMachineFunctionInfo.h"
-#include "SystemZRegisterInfo.h"
-#include "SystemZSubtarget.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "SystemZGenRegisterInfo.inc"
-
-using namespace llvm;
-
-SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
- const SystemZInstrInfo &tii)
- : SystemZGenRegisterInfo(0), TM(tm), TII(tii) {
-}
-
-const unsigned*
-SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const unsigned CalleeSavedRegs[] = {
- SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
- SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
- SystemZ::R14D, SystemZ::R15D,
- SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
- SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
- 0
- };
-
- return CalleeSavedRegs;
-}
-
-BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (TFI->hasFP(MF)) {
- // R11D is the frame pointer. Reserve all aliases.
- Reserved.set(SystemZ::R11D);
- Reserved.set(SystemZ::R11W);
- Reserved.set(SystemZ::R10P);
- Reserved.set(SystemZ::R10Q);
- }
-
- Reserved.set(SystemZ::R14D);
- Reserved.set(SystemZ::R15D);
- Reserved.set(SystemZ::R14W);
- Reserved.set(SystemZ::R15W);
- Reserved.set(SystemZ::R14P);
- Reserved.set(SystemZ::R14Q);
- return Reserved;
-}
-
-const TargetRegisterClass*
-SystemZRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- unsigned Idx) const {
- switch(Idx) {
- // Exact sub-classes don't exist for the other sub-register indexes.
- default: return 0;
- case SystemZ::subreg_32bit:
- if (B == SystemZ::ADDR32RegisterClass)
- return A->getSize() == 8 ? SystemZ::ADDR64RegisterClass : 0;
- return A;
- }
-}
-
-void SystemZRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MBB.erase(I);
-}
-
-void
-SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
- assert(SPAdj == 0 && "Unxpected");
-
- unsigned i = 0;
- MachineInstr &MI = *II;
- MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
-
- unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
-
- // This must be part of a rri or ri operand memory reference. Replace the
- // FrameIndex with base register with BasePtr. Add an offset to the
- // displacement field.
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
-
- // Offset is a either 12-bit unsigned or 20-bit signed integer.
- // FIXME: handle "too long" displacements.
- int Offset =
- TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
-
- // Check whether displacement is too long to fit into 12 bit zext field.
- MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
-
- MI.getOperand(i+1).ChangeToImmediate(Offset);
-}
-
-unsigned
-SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- assert(0 && "What is the frame register");
- return 0;
-}
-
-unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
- assert(0 && "What is the exception register");
- return 0;
-}
-
-unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
- assert(0 && "What is the exception handler register");
- return 0;
-}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
deleted file mode 100644
index 03935b2bec10..000000000000
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SystemZREGISTERINFO_H
-#define SystemZREGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "SystemZGenRegisterInfo.inc"
-
-namespace llvm {
-
-class SystemZSubtarget;
-class SystemZInstrInfo;
-class Type;
-
-struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
- SystemZTargetMachine &TM;
- const SystemZInstrInfo &TII;
-
- SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
-
- /// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
- BitVector getReservedRegs(const MachineFunction &MF) const;
-
- const TargetRegisterClass*
- getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B, unsigned Idx) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
-
- // Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
deleted file mode 100644
index a24cbcf4ccd8..000000000000
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ /dev/null
@@ -1,205 +0,0 @@
-//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-class SystemZReg<string n> : Register<n> {
- let Namespace = "SystemZ";
-}
-
-class SystemZRegWithSubregs<string n, list<Register> subregs>
- : RegisterWithSubRegs<n, subregs> {
- let Namespace = "SystemZ";
-}
-
-// We identify all our registers with a 4-bit ID, for consistency's sake.
-
-// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
-class GPR32<bits<4> num, string n> : SystemZReg<n> {
- field bits<4> Num = num;
-}
-
-// GPR64 - One of the 16 64-bit general-purpose registers
-class GPR64<bits<4> num, string n, list<Register> subregs,
- list<Register> aliases = []>
- : SystemZRegWithSubregs<n, subregs> {
- field bits<4> Num = num;
- let Aliases = aliases;
-}
-
-// GPR128 - 8 even-odd register pairs
-class GPR128<bits<4> num, string n, list<Register> subregs,
- list<Register> aliases = []>
- : SystemZRegWithSubregs<n, subregs> {
- field bits<4> Num = num;
- let Aliases = aliases;
-}
-
-// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
-class FPRS<bits<4> num, string n> : SystemZReg<n> {
- field bits<4> Num = num;
-}
-
-// FPRL - One of the 16 64-bit floating-point registers
-class FPRL<bits<4> num, string n, list<Register> subregs>
- : SystemZRegWithSubregs<n, subregs> {
- field bits<4> Num = num;
-}
-
-let Namespace = "SystemZ" in {
-def subreg_32bit : SubRegIndex;
-def subreg_odd32 : SubRegIndex;
-def subreg_even : SubRegIndex;
-def subreg_odd : SubRegIndex;
-}
-
-// General-purpose registers
-def R0W : GPR32< 0, "r0">;
-def R1W : GPR32< 1, "r1">;
-def R2W : GPR32< 2, "r2">;
-def R3W : GPR32< 3, "r3">;
-def R4W : GPR32< 4, "r4">;
-def R5W : GPR32< 5, "r5">;
-def R6W : GPR32< 6, "r6">;
-def R7W : GPR32< 7, "r7">;
-def R8W : GPR32< 8, "r8">;
-def R9W : GPR32< 9, "r9">;
-def R10W : GPR32<10, "r10">;
-def R11W : GPR32<11, "r11">;
-def R12W : GPR32<12, "r12">;
-def R13W : GPR32<13, "r13">;
-def R14W : GPR32<14, "r14">;
-def R15W : GPR32<15, "r15">;
-
-let SubRegIndices = [subreg_32bit] in {
-def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>;
-def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>;
-def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>;
-def R3D : GPR64< 3, "r3", [R3W]>, DwarfRegNum<[3]>;
-def R4D : GPR64< 4, "r4", [R4W]>, DwarfRegNum<[4]>;
-def R5D : GPR64< 5, "r5", [R5W]>, DwarfRegNum<[5]>;
-def R6D : GPR64< 6, "r6", [R6W]>, DwarfRegNum<[6]>;
-def R7D : GPR64< 7, "r7", [R7W]>, DwarfRegNum<[7]>;
-def R8D : GPR64< 8, "r8", [R8W]>, DwarfRegNum<[8]>;
-def R9D : GPR64< 9, "r9", [R9W]>, DwarfRegNum<[9]>;
-def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
-def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
-def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
-def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
-def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
-def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
-}
-
-// Register pairs
-let SubRegIndices = [subreg_32bit, subreg_odd32] in {
-def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>;
-def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>;
-def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>;
-def R6P : GPR64< 6, "r6", [R6W, R7W], [R6D, R7D]>;
-def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>;
-def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>;
-def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>;
-def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>;
-}
-
-let SubRegIndices = [subreg_even, subreg_odd],
- CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in {
-def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>;
-def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>;
-def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>;
-def R6Q : GPR128< 6, "r6", [R6D, R7D], [R6P]>;
-def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>;
-def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>;
-def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>;
-def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>;
-}
-
-// Floating-point registers
-def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>;
-def F1S : FPRS< 1, "f1">, DwarfRegNum<[17]>;
-def F2S : FPRS< 2, "f2">, DwarfRegNum<[18]>;
-def F3S : FPRS< 3, "f3">, DwarfRegNum<[19]>;
-def F4S : FPRS< 4, "f4">, DwarfRegNum<[20]>;
-def F5S : FPRS< 5, "f5">, DwarfRegNum<[21]>;
-def F6S : FPRS< 6, "f6">, DwarfRegNum<[22]>;
-def F7S : FPRS< 7, "f7">, DwarfRegNum<[23]>;
-def F8S : FPRS< 8, "f8">, DwarfRegNum<[24]>;
-def F9S : FPRS< 9, "f9">, DwarfRegNum<[25]>;
-def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
-def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
-def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
-def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
-def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
-def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
-
-let SubRegIndices = [subreg_32bit] in {
-def F0L : FPRL< 0, "f0", [F0S]>;
-def F1L : FPRL< 1, "f1", [F1S]>;
-def F2L : FPRL< 2, "f2", [F2S]>;
-def F3L : FPRL< 3, "f3", [F3S]>;
-def F4L : FPRL< 4, "f4", [F4S]>;
-def F5L : FPRL< 5, "f5", [F5S]>;
-def F6L : FPRL< 6, "f6", [F6S]>;
-def F7L : FPRL< 7, "f7", [F7S]>;
-def F8L : FPRL< 8, "f8", [F8S]>;
-def F9L : FPRL< 9, "f9", [F9S]>;
-def F10L : FPRL<10, "f10", [F10S]>;
-def F11L : FPRL<11, "f11", [F11S]>;
-def F12L : FPRL<12, "f12", [F12S]>;
-def F13L : FPRL<13, "f13", [F13S]>;
-def F14L : FPRL<14, "f14", [F14S]>;
-def F15L : FPRL<15, "f15", [F15S]>;
-}
-
-// Status register
-def PSW : SystemZReg<"psw">;
-
-/// Register classes.
-/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
-/// together with R14 and R15 in one prolog instruction.
-def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW", 0, 5),
- (sequence "R%uW", 15, 6))>;
-
-/// Registers used to generate address. Everything except R0.
-def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
-
-def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD", 0, 5),
- (sequence "R%uD", 15, 6))> {
- let SubRegClasses = [(GR32 subreg_32bit)];
-}
-
-def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
- let SubRegClasses = [(ADDR32 subreg_32bit)];
-}
-
-// Even-odd register pairs
-def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
- R12P, R10P, R8P, R6P,
- R14P)> {
- let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
-}
-
-def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
- R12Q, R10Q, R8Q, R6Q,
- R14Q)> {
- let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
- (GR64 subreg_even, subreg_odd)];
-}
-
-def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
-
-def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
- let SubRegClasses = [(FP32 subreg_32bit)];
-}
-
-// Status flags registers.
-def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
- let CopyCost = -1; // Don't allow copying of status registers.
-}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
deleted file mode 100644
index 3eabcd24c598..000000000000
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SystemZSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "systemz-selectiondag-info"
-#include "SystemZTargetMachine.h"
-using namespace llvm;
-
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
-
-SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
-}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
deleted file mode 100644
index 1450401d0403..000000000000
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZSELECTIONDAGINFO_H
-#define SYSTEMZSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class SystemZTargetMachine;
-
-class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
- ~SystemZSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
deleted file mode 100644
index 0845510761c2..000000000000
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZSubtarget.h"
-#include "SystemZ.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "SystemZGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-SystemZSubtarget::SystemZSubtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS):
- SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
- std::string CPUName = CPU;
- if (CPUName.empty())
- CPUName = "z9";
-
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-}
-
-/// True if accessing the GV requires an extra load.
-bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
- const TargetMachine& TM,
- bool isDirectCall) const {
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // Extra load is needed for all externally visible.
- if (isDirectCall)
- return false;
-
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
- return false;
-
- return true;
- }
-
- return false;
-}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
deleted file mode 100644
index 55cfd80002bc..000000000000
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
-#define LLVM_TARGET_SystemZ_SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "SystemZGenSubtargetInfo.inc"
-
-namespace llvm {
-class GlobalValue;
-class StringRef;
-class TargetMachine;
-
-class SystemZSubtarget : public SystemZGenSubtargetInfo {
- bool HasZ10Insts;
-public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- ///
- SystemZSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- bool isZ10() const { return HasZ10Insts; }
-
- bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
- bool isDirectCall) const;
-};
-} // End llvm namespace
-
-#endif // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
deleted file mode 100644
index e390f060c9a9..000000000000
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZTargetMachine.h"
-#include "SystemZ.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-extern "C" void LLVMInitializeSystemZTarget() {
- // Register the target.
- RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
-}
-
-/// SystemZTargetMachine ctor - Create an ILP64 architecture model
-///
-SystemZTargetMachine::SystemZTargetMachine(const Target &T,
- StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
- CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
- Subtarget(TT, CPU, FS),
- DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
- "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
-}
-
-bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // Install an instruction selector.
- PM.add(createSystemZISelDag(*this, OptLevel));
- return false;
-}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
deleted file mode 100644
index 43dce4bd148e..000000000000
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SystemZ specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
-#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
-
-#include "SystemZInstrInfo.h"
-#include "SystemZISelLowering.h"
-#include "SystemZFrameLowering.h"
-#include "SystemZSelectionDAGInfo.h"
-#include "SystemZRegisterInfo.h"
-#include "SystemZSubtarget.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-/// SystemZTargetMachine
-///
-class SystemZTargetMachine : public LLVMTargetMachine {
- SystemZSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- SystemZInstrInfo InstrInfo;
- SystemZTargetLowering TLInfo;
- SystemZSelectionDAGInfo TSInfo;
- SystemZFrameLowering FrameLowering;
-public:
- SystemZTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
-
- virtual const TargetFrameLowering *getFrameLowering() const {
- return &FrameLowering;
- }
- virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetData *getTargetData() const { return &DataLayout;}
- virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
-
- virtual const SystemZRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
-
- virtual const SystemZTargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-}; // SystemZTargetMachine.
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 31807081bd6e..000000000000
--- a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMSystemZInfo
- SystemZTargetInfo.cpp
- )
-
-add_llvm_library_dependencies(LLVMSystemZInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
-add_dependencies(LLVMSystemZInfo SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
deleted file mode 100644
index 0be80eb4e6ad..000000000000
--- a/lib/Target/SystemZ/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index bd6a6b67beb9..acb74765c193 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -125,15 +125,15 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =
//===----------------------------------------------------------------------===//
/// getInt - Get an integer ignoring errors.
-static unsigned getInt(StringRef R) {
- unsigned Result = 0;
+static int getInt(StringRef R) {
+ int Result = 0;
R.getAsInteger(10, Result);
return Result;
}
-void TargetData::init(StringRef Desc) {
+void TargetData::init() {
initializeTargetDataPass(*PassRegistry::getPassRegistry());
-
+
LayoutMap = 0;
LittleEndian = false;
PointerMemSize = 8;
@@ -147,11 +147,19 @@ void TargetData::init(StringRef Desc) {
setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
+ setAlignment(FLOAT_ALIGN, 2, 2, 16); // half
setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(FLOAT_ALIGN, 16, 16, 128); // ppcf128, quad, ...
setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ...
setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
+}
+
+std::string TargetData::parseSpecifier(StringRef Desc, TargetData *td) {
+
+ if (td)
+ td->init();
while (!Desc.empty()) {
std::pair<StringRef, StringRef> Split = Desc.split('-');
@@ -169,28 +177,54 @@ void TargetData::init(StringRef Desc) {
switch (Specifier[0]) {
case 'E':
- LittleEndian = false;
+ if (td)
+ td->LittleEndian = false;
break;
case 'e':
- LittleEndian = true;
+ if (td)
+ td->LittleEndian = true;
break;
- case 'p':
+ case 'p': {
+ // Pointer size.
Split = Token.split(':');
- PointerMemSize = getInt(Split.first) / 8;
+ int PointerMemSizeBits = getInt(Split.first);
+ if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0)
+ return "invalid pointer size, must be a positive 8-bit multiple";
+ if (td)
+ td->PointerMemSize = PointerMemSizeBits / 8;
+
+ // Pointer ABI alignment.
Split = Split.second.split(':');
- PointerABIAlign = getInt(Split.first) / 8;
+ int PointerABIAlignBits = getInt(Split.first);
+ if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) {
+ return "invalid pointer ABI alignment, "
+ "must be a positive 8-bit multiple";
+ }
+ if (td)
+ td->PointerABIAlign = PointerABIAlignBits / 8;
+
+ // Pointer preferred alignment.
Split = Split.second.split(':');
- PointerPrefAlign = getInt(Split.first) / 8;
- if (PointerPrefAlign == 0)
- PointerPrefAlign = PointerABIAlign;
+ int PointerPrefAlignBits = getInt(Split.first);
+ if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) {
+ return "invalid pointer preferred alignment, "
+ "must be a positive 8-bit multiple";
+ }
+ if (td) {
+ td->PointerPrefAlign = PointerPrefAlignBits / 8;
+ if (td->PointerPrefAlign == 0)
+ td->PointerPrefAlign = td->PointerABIAlign;
+ }
break;
+ }
case 'i':
case 'v':
case 'f':
case 'a':
case 's': {
AlignTypeEnum AlignType;
- switch (Specifier[0]) {
+ char field = Specifier[0];
+ switch (field) {
default:
case 'i': AlignType = INTEGER_ALIGN; break;
case 'v': AlignType = VECTOR_ALIGN; break;
@@ -198,37 +232,66 @@ void TargetData::init(StringRef Desc) {
case 'a': AlignType = AGGREGATE_ALIGN; break;
case 's': AlignType = STACK_ALIGN; break;
}
- unsigned Size = getInt(Specifier.substr(1));
+ int Size = getInt(Specifier.substr(1));
+ if (Size < 0) {
+ return std::string("invalid ") + field + "-size field, "
+ "must be positive";
+ }
+
Split = Token.split(':');
- unsigned ABIAlign = getInt(Split.first) / 8;
+ int ABIAlignBits = getInt(Split.first);
+ if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) {
+ return std::string("invalid ") + field +"-abi-alignment field, "
+ "must be a positive 8-bit multiple";
+ }
+ unsigned ABIAlign = ABIAlignBits / 8;
Split = Split.second.split(':');
- unsigned PrefAlign = getInt(Split.first) / 8;
+
+ int PrefAlignBits = getInt(Split.first);
+ if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) {
+ return std::string("invalid ") + field +"-preferred-alignment field, "
+ "must be a positive 8-bit multiple";
+ }
+ unsigned PrefAlign = PrefAlignBits / 8;
if (PrefAlign == 0)
PrefAlign = ABIAlign;
- setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+
+ if (td)
+ td->setAlignment(AlignType, ABIAlign, PrefAlign, Size);
break;
}
case 'n': // Native integer types.
Specifier = Specifier.substr(1);
do {
- if (unsigned Width = getInt(Specifier))
- LegalIntWidths.push_back(Width);
+ int Width = getInt(Specifier);
+ if (Width <= 0) {
+ return std::string("invalid native integer size \'") + Specifier.str() +
+ "\', must be a positive integer.";
+ }
+ if (td && Width != 0)
+ td->LegalIntWidths.push_back(Width);
Split = Token.split(':');
Specifier = Split.first;
Token = Split.second;
} while (!Specifier.empty() || !Token.empty());
break;
- case 'S': // Stack natural alignment.
- StackNaturalAlign = getInt(Specifier.substr(1));
- StackNaturalAlign /= 8;
- // FIXME: Should we really be truncating these alingments and
- // sizes silently?
+ case 'S': { // Stack natural alignment.
+ int StackNaturalAlignBits = getInt(Specifier.substr(1));
+ if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) {
+ return "invalid natural stack alignment (S-field), "
+ "must be a positive 8-bit multiple";
+ }
+ if (td)
+ td->StackNaturalAlign = StackNaturalAlignBits / 8;
break;
+ }
default:
break;
}
}
+
+ return "";
}
/// Default ctor.
@@ -242,7 +305,9 @@ TargetData::TargetData() : ImmutablePass(ID) {
TargetData::TargetData(const Module *M)
: ImmutablePass(ID) {
- init(M->getDataLayout());
+ std::string errMsg = parseSpecifier(M->getDataLayout(), this);
+ assert(errMsg == "" && "Module M has malformed target data layout string.");
+ (void)errMsg;
}
void
@@ -308,7 +373,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
// If the alignment is not a power of 2, round up to the next power of 2.
// This happens for non-power-of-2 length vectors.
if (Align & (Align-1))
- Align = llvm::NextPowerOf2(Align);
+ Align = NextPowerOf2(Align);
return Align;
}
}
@@ -414,6 +479,8 @@ uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
return cast<IntegerType>(Ty)->getBitWidth();
case Type::VoidTyID:
return 8;
+ case Type::HalfTyID:
+ return 16;
case Type::FloatTyID:
return 32;
case Type::DoubleTyID:
@@ -430,9 +497,7 @@ uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
return cast<VectorType>(Ty)->getBitWidth();
default:
llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
- break;
}
- return 0;
}
/*!
@@ -471,6 +536,7 @@ unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
case Type::VoidTyID:
AlignType = INTEGER_ALIGN;
break;
+ case Type::HalfTyID:
case Type::FloatTyID:
case Type::DoubleTyID:
// PPC_FP128TyID and FP128TyID have different data contents, but the
@@ -486,7 +552,6 @@ unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
break;
default:
llvm_unreachable("Bad type for getAlignment!!!");
- break;
}
return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d52ecb32cf75..440f9ad00de9 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -13,7 +13,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
@@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-int
-TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const {
- if (!ItinData || ItinData->isEmpty())
- return -1;
-
- if (!DefNode->isMachineOpcode())
- return -1;
-
- unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
- if (!UseNode->isMachineOpcode())
- return ItinData->getOperandCycle(DefClass, DefIdx);
- unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
- return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return ItinData->getStageLatency(MI->getDesc().getSchedClass());
}
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *N) const {
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
- if (!N->isMachineOpcode())
- return 1;
-
- return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
-}
-
bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI,
unsigned DefIdx) const {
@@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
}
-bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
-
- // Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
- return true;
- if (!MCID.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
-
/// Measure the specified inline asm to determine an approximation of its
/// length.
/// Comments (which run till the next SeparatorString or newline) do not
diff --git a/lib/Target/TargetJITInfo.cpp b/lib/Target/TargetJITInfo.cpp
new file mode 100644
index 000000000000..aafedf8749b1
--- /dev/null
+++ b/lib/Target/TargetJITInfo.cpp
@@ -0,0 +1,14 @@
+//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetJITInfo.h"
+
+using namespace llvm;
+
+void TargetJITInfo::anchor() { }
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 709dfd283f98..269958fd7f17 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -20,6 +20,106 @@ INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
"Target Library Information", false, true)
char TargetLibraryInfo::ID = 0;
+void TargetLibraryInfo::anchor() { }
+
+const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
+ {
+ "acos",
+ "acosl",
+ "acosf",
+ "asin",
+ "asinl",
+ "asinf",
+ "atan",
+ "atanl",
+ "atanf",
+ "atan2",
+ "atan2l",
+ "atan2f",
+ "ceil",
+ "ceill",
+ "ceilf",
+ "copysign",
+ "copysignf",
+ "copysignl",
+ "cos",
+ "cosl",
+ "cosf",
+ "cosh",
+ "coshl",
+ "coshf",
+ "exp",
+ "expl",
+ "expf",
+ "exp2",
+ "exp2l",
+ "exp2f",
+ "expm1",
+ "expm1l",
+ "expl1f",
+ "fabs",
+ "fabsl",
+ "fabsf",
+ "floor",
+ "floorl",
+ "floorf",
+ "fiprintf",
+ "fmod",
+ "fmodl",
+ "fmodf",
+ "fputs",
+ "fwrite",
+ "iprintf",
+ "log",
+ "logl",
+ "logf",
+ "log2",
+ "log2l",
+ "log2f",
+ "log10",
+ "log10l",
+ "log10f",
+ "log1p",
+ "log1pl",
+ "log1pf",
+ "memcpy",
+ "memmove",
+ "memset",
+ "memset_pattern16",
+ "nearbyint",
+ "nearbyintf",
+ "nearbyintl",
+ "pow",
+ "powf",
+ "powl",
+ "rint",
+ "rintf",
+ "rintl",
+ "sin",
+ "sinl",
+ "sinf",
+ "sinh",
+ "sinhl",
+ "sinhf",
+ "siprintf",
+ "sqrt",
+ "sqrtl",
+ "sqrtf",
+ "tan",
+ "tanl",
+ "tanf",
+ "tanh",
+ "tanhl",
+ "tanhf",
+ "trunc",
+ "truncf",
+ "truncl",
+ "__cxa_atexit",
+ "__cxa_guard_abort",
+ "__cxa_guard_acquire",
+ "__cxa_guard_release"
+ };
+
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
@@ -38,6 +138,17 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
TLI.setUnavailable(LibFunc::memset_pattern16);
}
+ if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+ !T.isMacOSXVersionLT(10, 7)) {
+ // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+ // we don't care about) have two versions; on recent OSX, the one we want
+ // has a $UNIX2003 suffix. The two implementations are identical except
+ // for the return value in some edge cases. However, we don't want to
+ // generate code that depends on the old symbols.
+ TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+ TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+ }
+
// iprintf and friends are only available on XCore and TCE.
if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
TLI.setUnavailable(LibFunc::iprintf);
@@ -64,6 +175,7 @@ TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
: ImmutablePass(ID) {
memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ CustomNames = TLI.CustomNames;
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 56b7b69de0bd..2570e0d0972b 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -28,7 +28,6 @@
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -48,7 +47,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
-static bool isSuitableForBSS(const GlobalVariable *GV) {
+static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
const Constant *C = GV->getInitializer();
// Must have zero initializer.
@@ -73,31 +72,27 @@ static bool isSuitableForBSS(const GlobalVariable *GV) {
/// IsNullTerminatedString - Return true if the specified constant (which is
/// known to have a type that is an array of 1/2/4 byte elements) ends with a
-/// nul value and contains no other nuls in it.
+/// nul value and contains no other nuls in it. Note that this is more general
+/// than ConstantDataSequential::isString because we allow 2 & 4 byte strings.
static bool IsNullTerminatedString(const Constant *C) {
- ArrayType *ATy = cast<ArrayType>(C->getType());
-
- // First check: is we have constant array of i8 terminated with zero
- if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
- if (ATy->getNumElements() == 0) return false;
-
- ConstantInt *Null =
- dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
- if (Null == 0 || !Null->isZero())
+ // First check: is we have constant array terminated with zero
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ unsigned NumElts = CDS->getNumElements();
+ assert(NumElts != 0 && "Can't have an empty CDS");
+
+ if (CDS->getElementAsInteger(NumElts-1) != 0)
return false; // Not null terminated.
-
+
// Verify that the null doesn't occur anywhere else in the string.
- for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
- // Reject constantexpr elements etc.
- if (!isa<ConstantInt>(CVA->getOperand(i)) ||
- CVA->getOperand(i) == Null)
+ for (unsigned i = 0; i != NumElts-1; ++i)
+ if (CDS->getElementAsInteger(i) == 0)
return false;
return true;
}
// Another possibility: [1 x i8] zeroinitializer
if (isa<ConstantAggregateZero>(C))
- return ATy->getNumElements() == 1;
+ return cast<ArrayType>(C->getType())->getNumElements() == 1;
return false;
}
@@ -133,7 +128,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Handle thread-local data first.
if (GVar->isThreadLocal()) {
- if (isSuitableForBSS(GVar))
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS))
return SectionKind::getThreadBSS();
return SectionKind::getThreadData();
}
@@ -143,7 +138,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getCommon();
// Variable can be easily put to BSS section.
- if (isSuitableForBSS(GVar)) {
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) {
if (GVar->hasLocalLinkage())
return SectionKind::getBSSLocal();
else if (GVar->hasExternalLinkage())
@@ -160,7 +155,6 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// relocation, then we may have to drop this into a wriable data section
// even though it is marked const.
switch (C->getRelocationInfo()) {
- default: assert(0 && "unknown relocation info kind");
case Constant::NoRelocation:
// If the global is required to have a unique address, it can't be put
// into a mergable section: just drop it into the general read-only
@@ -234,7 +228,6 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getDataNoRel();
switch (C->getRelocationInfo()) {
- default: assert(0 && "unknown relocation info kind");
case Constant::NoRelocation:
return SectionKind::getDataNoRel();
case Constant::LocalRelocation:
@@ -242,6 +235,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
case Constant::GlobalRelocations:
return SectionKind::getDataRel();
}
+ llvm_unreachable("Invalid relocation");
}
/// SectionForGlobal - This method computes the appropriate section to emit
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index fe8a7cebd0a0..b9b2526876fd 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -24,153 +23,10 @@ using namespace llvm;
//
namespace llvm {
- bool LessPreciseFPMADOption;
- bool PrintMachineCode;
- bool NoFramePointerElim;
- bool NoFramePointerElimNonLeaf;
- bool NoExcessFPPrecision;
- bool UnsafeFPMath;
- bool NoInfsFPMath;
- bool NoNaNsFPMath;
- bool HonorSignDependentRoundingFPMathOption;
- bool UseSoftFloat;
- FloatABI::ABIType FloatABIType;
- bool NoImplicitFloat;
- bool NoZerosInBSS;
- bool JITExceptionHandling;
- bool JITEmitDebugInfo;
- bool JITEmitDebugInfoToDisk;
- bool GuaranteedTailCallOpt;
- unsigned StackAlignmentOverride;
- bool RealignStack;
- bool DisableJumpTables;
- bool StrongPHIElim;
bool HasDivModLibcall;
bool AsmVerbosityDefault(false);
- bool EnableSegmentedStacks;
}
-static cl::opt<bool, true>
-PrintCode("print-machineinstrs",
- cl::desc("Print generated machine code"),
- cl::location(PrintMachineCode), cl::init(false));
-static cl::opt<bool, true>
-DisableFPElim("disable-fp-elim",
- cl::desc("Disable frame pointer elimination optimization"),
- cl::location(NoFramePointerElim),
- cl::init(false));
-static cl::opt<bool, true>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
- cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
- cl::location(NoFramePointerElimNonLeaf),
- cl::init(false));
-static cl::opt<bool, true>
-DisableExcessPrecision("disable-excess-fp-precision",
- cl::desc("Disable optimizations that may increase FP precision"),
- cl::location(NoExcessFPPrecision),
- cl::init(false));
-static cl::opt<bool, true>
-EnableFPMAD("enable-fp-mad",
- cl::desc("Enable less precise MAD instructions to be generated"),
- cl::location(LessPreciseFPMADOption),
- cl::init(false));
-static cl::opt<bool, true>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::location(UnsafeFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
- cl::desc("Enable FP math optimizations that assume no +-Infs"),
- cl::location(NoInfsFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
- cl::desc("Enable FP math optimizations that assume no NaNs"),
- cl::location(NoNaNsFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
- cl::Hidden,
- cl::desc("Force codegen to assume rounding mode can change dynamically"),
- cl::location(HonorSignDependentRoundingFPMathOption),
- cl::init(false));
-static cl::opt<bool, true>
-GenerateSoftFloatCalls("soft-float",
- cl::desc("Generate software floating point library calls"),
- cl::location(UseSoftFloat),
- cl::init(false));
-static cl::opt<llvm::FloatABI::ABIType, true>
-FloatABIForCalls("float-abi",
- cl::desc("Choose float ABI type"),
- cl::location(FloatABIType),
- cl::init(FloatABI::Default),
- cl::values(
- clEnumValN(FloatABI::Default, "default",
- "Target default float ABI type"),
- clEnumValN(FloatABI::Soft, "soft",
- "Soft float ABI (implied by -soft-float)"),
- clEnumValN(FloatABI::Hard, "hard",
- "Hard float ABI (uses FP registers)"),
- clEnumValEnd));
-static cl::opt<bool, true>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
- cl::desc("Don't place zero-initialized symbols into bss section"),
- cl::location(NoZerosInBSS),
- cl::init(false));
-static cl::opt<bool, true>
-EnableJITExceptionHandling("jit-enable-eh",
- cl::desc("Emit exception handling information"),
- cl::location(JITExceptionHandling),
- cl::init(false));
-// In debug builds, make this default to true.
-#ifdef NDEBUG
-#define EMIT_DEBUG false
-#else
-#define EMIT_DEBUG true
-#endif
-static cl::opt<bool, true>
-EmitJitDebugInfo("jit-emit-debug",
- cl::desc("Emit debug information to debugger"),
- cl::location(JITEmitDebugInfo),
- cl::init(EMIT_DEBUG));
-#undef EMIT_DEBUG
-static cl::opt<bool, true>
-EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
- cl::Hidden,
- cl::desc("Emit debug info objfiles to disk"),
- cl::location(JITEmitDebugInfoToDisk),
- cl::init(false));
-
-static cl::opt<bool, true>
-EnableGuaranteedTailCallOpt("tailcallopt",
- cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
- cl::location(GuaranteedTailCallOpt),
- cl::init(false));
-static cl::opt<unsigned, true>
-OverrideStackAlignment("stack-alignment",
- cl::desc("Override default stack alignment"),
- cl::location(StackAlignmentOverride),
- cl::init(0));
-static cl::opt<bool, true>
-EnableRealignStack("realign-stack",
- cl::desc("Realign stack if needed"),
- cl::location(RealignStack),
- cl::init(true));
-static cl::opt<bool, true>
-DisableSwitchTables(cl::Hidden, "disable-jump-tables",
- cl::desc("Do not generate jump tables."),
- cl::location(DisableJumpTables),
- cl::init(false));
-static cl::opt<bool, true>
-EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
- cl::desc("Use strong PHI elimination."),
- cl::location(StrongPHIElim),
- cl::init(false));
-static cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
- cl::desc("Emit a call to trap function rather than a trap instruction"),
- cl::init(""));
static cl::opt<bool>
DataSections("fdata-sections",
cl::desc("Emit data into separate sections"),
@@ -179,29 +35,23 @@ static cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"),
cl::init(false));
-static cl::opt<bool, true>
-SegmentedStacks("segmented-stacks",
- cl::desc("Use segmented stacks if possible."),
- cl::location(EnableSegmentedStacks),
- cl::init(false));
-
+
//---------------------------------------------------------------------------
// TargetMachine Class
//
TargetMachine::TargetMachine(const Target &T,
- StringRef TT, StringRef CPU, StringRef FS)
+ StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options)
: TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
CodeGenInfo(0), AsmInfo(0),
MCRelaxAll(false),
MCNoExecStack(false),
MCSaveTempLabels(false),
MCUseLoc(true),
- MCUseCFI(true) {
- // Typically it will be subtargets that will adjust FloatABIType from Default
- // to Soft or Hard.
- if (UseSoftFloat)
- FloatABIType = FloatABI::Soft;
+ MCUseCFI(true),
+ MCUseDwarfDirectory(false),
+ Options(Options) {
}
TargetMachine::~TargetMachine() {
@@ -225,6 +75,35 @@ CodeModel::Model TargetMachine::getCodeModel() const {
return CodeGenInfo->getCodeModel();
}
+TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
+ bool isLocal = GV->hasLocalLinkage();
+ bool isDeclaration = GV->isDeclaration();
+ // FIXME: what should we do for protected and internal visibility?
+ // For variables, is internal different from hidden?
+ bool isHidden = GV->hasHiddenVisibility();
+
+ if (getRelocationModel() == Reloc::PIC_ &&
+ !Options.PositionIndependentExecutable) {
+ if (isLocal || isHidden)
+ return TLSModel::LocalDynamic;
+ else
+ return TLSModel::GeneralDynamic;
+ } else {
+ if (!isDeclaration || isHidden)
+ return TLSModel::LocalExec;
+ else
+ return TLSModel::InitialExec;
+ }
+}
+
+/// getOptLevel - Returns the optimization level: None, Less,
+/// Default, or Aggressive.
+CodeGenOpt::Level TargetMachine::getOptLevel() const {
+ if (!CodeGenInfo)
+ return CodeGenOpt::Default;
+ return CodeGenInfo->getOptLevel();
+}
+
bool TargetMachine::getAsmVerbosityDefault() {
return AsmVerbosityDefault;
}
@@ -249,36 +128,3 @@ void TargetMachine::setDataSections(bool V) {
DataSections = V;
}
-namespace llvm {
- /// DisableFramePointerElim - This returns true if frame pointer elimination
- /// optimization should be disabled for the given machine function.
- bool DisableFramePointerElim(const MachineFunction &MF) {
- // Check to see if we should eliminate non-leaf frame pointers and then
- // check to see if we should eliminate all frame pointers.
- if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->hasCalls();
- }
-
- return NoFramePointerElim;
- }
-
- /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
- /// is specified on the command line. When this flag is off(default), the
- /// code generator is not allowed to generate mad (multiply add) if the
- /// result is "less precise" than doing those operations individually.
- bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
-
- /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
- /// that the rounding mode of the FPU can change from its default.
- bool HonorSignDependentRoundingFPMath() {
- return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
- }
-
- /// getTrapFunctionName - If this returns a non-empty string, this means isel
- /// should lower Intrinsic::trap to a call to the specified function name
- /// instead of an ISD::TRAP node.
- StringRef getTrapFunctionName() {
- return TrapFuncName;
- }
-}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
new file mode 100644
index 000000000000..d6bba8b0dd05
--- /dev/null
+++ b/lib/Target/TargetMachineC.cpp
@@ -0,0 +1,197 @@
+//===-- TargetMachine.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM-C part of TargetMachine.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+
+
+LLVMTargetRef LLVMGetFirstTarget() {
+ const Target* target = &*TargetRegistry::begin();
+ return wrap(target);
+}
+LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
+ return wrap(unwrap(T)->getNext());
+}
+
+const char * LLVMGetTargetName(LLVMTargetRef T) {
+ return unwrap(T)->getName();
+}
+
+const char * LLVMGetTargetDescription(LLVMTargetRef T) {
+ return unwrap(T)->getShortDescription();
+}
+
+LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) {
+ return unwrap(T)->hasJIT();
+}
+
+LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) {
+ return unwrap(T)->hasTargetMachine();
+}
+
+LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
+ return unwrap(T)->hasMCAsmBackend();
+}
+
+LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
+ char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
+ LLVMCodeModel CodeModel) {
+ Reloc::Model RM;
+ switch (Reloc){
+ case LLVMRelocStatic:
+ RM = Reloc::Static;
+ break;
+ case LLVMRelocPIC:
+ RM = Reloc::PIC_;
+ break;
+ case LLVMRelocDynamicNoPic:
+ RM = Reloc::DynamicNoPIC;
+ break;
+ default:
+ RM = Reloc::Default;
+ break;
+ }
+
+ CodeModel::Model CM;
+ switch (CodeModel) {
+ case LLVMCodeModelJITDefault:
+ CM = CodeModel::JITDefault;
+ break;
+ case LLVMCodeModelSmall:
+ CM = CodeModel::Small;
+ break;
+ case LLVMCodeModelKernel:
+ CM = CodeModel::Kernel;
+ break;
+ case LLVMCodeModelMedium:
+ CM = CodeModel::Medium;
+ break;
+ case LLVMCodeModelLarge:
+ CM = CodeModel::Large;
+ break;
+ default:
+ CM = CodeModel::Default;
+ break;
+ }
+ CodeGenOpt::Level OL;
+
+ switch (Level) {
+ case LLVMCodeGenLevelNone:
+ OL = CodeGenOpt::None;
+ break;
+ case LLVMCodeGenLevelLess:
+ OL = CodeGenOpt::Less;
+ break;
+ case LLVMCodeGenLevelAggressive:
+ OL = CodeGenOpt::Aggressive;
+ break;
+ default:
+ OL = CodeGenOpt::Default;
+ break;
+ }
+
+ TargetOptions opt;
+ return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM,
+ CM, OL));
+}
+
+
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) {
+ delete unwrap(T);
+}
+
+LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
+ const Target* target = &(unwrap(T)->getTarget());
+ return wrap(target);
+}
+
+char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) {
+ std::string StringRep = unwrap(T)->getTargetTriple();
+ return strdup(StringRep.c_str());
+}
+
+char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) {
+ std::string StringRep = unwrap(T)->getTargetCPU();
+ return strdup(StringRep.c_str());
+}
+
+char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
+ std::string StringRep = unwrap(T)->getTargetFeatureString();
+ return strdup(StringRep.c_str());
+}
+
+LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
+ return wrap(unwrap(T)->getTargetData());
+}
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+ char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+ TargetMachine* TM = unwrap(T);
+ Module* Mod = unwrap(M);
+
+ PassManager pass;
+
+ std::string error;
+
+ const TargetData* td = TM->getTargetData();
+
+ if (!td) {
+ error = "No TargetData in TargetMachine";
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+ pass.add(new TargetData(*td));
+
+ TargetMachine::CodeGenFileType ft;
+ switch (codegen) {
+ case LLVMAssemblyFile:
+ ft = TargetMachine::CGFT_AssemblyFile;
+ break;
+ default:
+ ft = TargetMachine::CGFT_ObjectFile;
+ break;
+ }
+ raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+ formatted_raw_ostream destf(dest);
+ if (!error.empty()) {
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+
+ if (TM->addPassesToEmitFile(pass, destf, ft)) {
+ error = "No TargetData in TargetMachine";
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+
+ pass.run(*Mod);
+
+ destf.flush();
+ dest.flush();
+ return false;
+}
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 67239b830eb5..1716423eeeac 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -13,8 +13,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,7 +71,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
const TargetRegisterClass *RC, BitVector &R){
- ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+ ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
for (unsigned i = 0; i != Order.size(); ++i)
R.set(Order[i]);
}
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 94aca7abb2bd..47489bb06c4e 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser
X86AsmParser.cpp
)
-add_llvm_library_dependencies(LLVMX86AsmParser
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMX86Desc
- LLVMX86Info
- )
-
add_dependencies(LLVMX86AsmParser X86CommonTableGen)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..9f94d5d38864
--- /dev/null
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86AsmParser
+parent = X86
+required_libraries = MC MCParser Support X86Desc X86Info
+add_to_library_groups = X86
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 1eaccff58a9d..2794e60df238 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -14,7 +14,6 @@
#include "llvm/MC/MCTargetAsmLexer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
using namespace llvm;
@@ -144,11 +143,7 @@ AsmToken X86AsmLexer::LexTokenIntel() {
SetError(Lexer->getErrLoc(), Lexer->getErr());
return lexedToken;
case AsmToken::Identifier: {
- std::string upperCase = lexedToken.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
- StringRef lowerRef(lowerCase);
-
- unsigned regID = MatchRegisterName(lowerRef);
+ unsigned regID = MatchRegisterName(lexedToken.getString().lower());
if (regID)
return AsmToken(AsmToken::Register,
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index cb4f15ffed3e..08c732c3886e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -17,10 +17,8 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
@@ -32,33 +30,47 @@ using namespace llvm;
namespace {
struct X86Operand;
-class X86ATTAsmParser : public MCTargetAsmParser {
+class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
-
private:
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ return Parser.Error(L, Msg, Ranges);
+ }
+
+ X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
+ Error(Loc, Msg);
+ return 0;
+ }
X86Operand *ParseOperand();
+ X86Operand *ParseATTOperand();
+ X86Operand *ParseIntelOperand();
+ X86Operand *ParseIntelMemOperand();
+ X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
+ bool processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
- /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
- /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
- /// or %es:(%edi) in 32bit mode.
+ /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
bool isDstOp(X86Operand &Op);
bool is64BitMode() const {
@@ -79,7 +91,7 @@ private:
/// }
public:
- X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
@@ -91,6 +103,10 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
+
+ bool isParsingIntelSyntax() {
+ return getParser().getAssemblerDialect();
+ }
};
} // end anonymous namespace
@@ -101,6 +117,31 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
+static bool isImmSExti16i8Value(uint64_t Value) {
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmSExti32i8Value(uint64_t Value) {
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmZExtu32u8Value(uint64_t Value) {
+ return (Value <= 0x00000000000000FFULL);
+}
+
+static bool isImmSExti64i8Value(uint64_t Value) {
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmSExti64i32Value(uint64_t Value) {
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
namespace {
/// X86Operand - Instances of this class represent a parsed X86 machine
@@ -135,6 +176,7 @@ struct X86Operand : public MCParsedAsmOperand {
unsigned BaseReg;
unsigned IndexReg;
unsigned Scale;
+ unsigned Size;
} Mem;
};
@@ -145,6 +187,8 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+
+ SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
virtual void print(raw_ostream &OS) const {}
@@ -205,10 +249,7 @@ struct X86Operand : public MCParsedAsmOperand {
// Otherwise, check the value is in a range that makes sense for this
// extension.
- uint64_t Value = CE->getValue();
- return (( Value <= 0x000000000000007FULL)||
- (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isImmSExti16i8Value(CE->getValue());
}
bool isImmSExti32i8() const {
if (!isImm())
@@ -222,10 +263,7 @@ struct X86Operand : public MCParsedAsmOperand {
// Otherwise, check the value is in a range that makes sense for this
// extension.
- uint64_t Value = CE->getValue();
- return (( Value <= 0x000000000000007FULL)||
- (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isImmSExti32i8Value(CE->getValue());
}
bool isImmZExtu32u8() const {
if (!isImm())
@@ -239,8 +277,7 @@ struct X86Operand : public MCParsedAsmOperand {
// Otherwise, check the value is in a range that makes sense for this
// extension.
- uint64_t Value = CE->getValue();
- return (Value <= 0x00000000000000FFULL);
+ return isImmZExtu32u8Value(CE->getValue());
}
bool isImmSExti64i8() const {
if (!isImm())
@@ -254,9 +291,7 @@ struct X86Operand : public MCParsedAsmOperand {
// Otherwise, check the value is in a range that makes sense for this
// extension.
- uint64_t Value = CE->getValue();
- return (( Value <= 0x000000000000007FULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isImmSExti64i8Value(CE->getValue());
}
bool isImmSExti64i32() const {
if (!isImm())
@@ -270,12 +305,31 @@ struct X86Operand : public MCParsedAsmOperand {
// Otherwise, check the value is in a range that makes sense for this
// extension.
- uint64_t Value = CE->getValue();
- return (( Value <= 0x000000007FFFFFFFULL)||
- (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isImmSExti64i32Value(CE->getValue());
}
bool isMem() const { return Kind == Memory; }
+ bool isMem8() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 8);
+ }
+ bool isMem16() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 16);
+ }
+ bool isMem32() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32);
+ }
+ bool isMem64() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64);
+ }
+ bool isMem80() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 80);
+ }
+ bool isMem128() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 128);
+ }
+ bool isMem256() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 256);
+ }
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
@@ -302,6 +356,28 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
+ void addMem8Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem16Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem80Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem128Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem256Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
@@ -313,11 +389,16 @@ struct X86Operand : public MCParsedAsmOperand {
void addAbsMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 1) && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
- X86Operand *Res = new X86Operand(Token, Loc, Loc);
+ SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
+ X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
return Res;
@@ -337,20 +418,22 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
- SMLoc EndLoc) {
+ SMLoc EndLoc, unsigned Size = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = 0;
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
+ Res->Mem.Size = Size;
return Res;
}
/// Create a generalized memory operand.
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
+ unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -364,13 +447,14 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.BaseReg = BaseReg;
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
+ Res->Mem.Size = Size;
return Res;
}
};
} // end anonymous namespace.
-bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+bool X86AsmParser::isSrcOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
return (Op.isMem() &&
@@ -380,32 +464,38 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
}
-bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
- return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ return Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
}
-bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
- SMLoc &StartLoc, SMLoc &EndLoc) {
+bool X86AsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
RegNo = 0;
- const AsmToken &TokPercent = Parser.getTok();
- assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
- StartLoc = TokPercent.getLoc();
- Parser.Lex(); // Eat percent token.
+ if (!isParsingIntelSyntax()) {
+ const AsmToken &TokPercent = Parser.getTok();
+ assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+ StartLoc = TokPercent.getLoc();
+ Parser.Lex(); // Eat percent token.
+ }
const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Identifier))
- return Error(Tok.getLoc(), "invalid register name");
+ if (Tok.isNot(AsmToken::Identifier)) {
+ if (isParsingIntelSyntax()) return true;
+ return Error(StartLoc, "invalid register name",
+ SMRange(StartLoc, Tok.getEndLoc()));
+ }
RegNo = MatchRegisterName(Tok.getString());
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
- RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+ RegNo = MatchRegisterName(Tok.getString().lower());
if (!is64BitMode()) {
// FIXME: This should be done using Requires<In32BitMode> and
@@ -417,8 +507,9 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo))
- return Error(Tok.getLoc(), "register %"
- + Tok.getString() + " is only available in 64-bit mode");
+ return Error(StartLoc, "register %"
+ + Tok.getString() + " is only available in 64-bit mode",
+ SMRange(StartLoc, Tok.getEndLoc()));
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -478,15 +569,182 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
}
}
- if (RegNo == 0)
- return Error(Tok.getLoc(), "invalid register name");
+ if (RegNo == 0) {
+ if (isParsingIntelSyntax()) return true;
+ return Error(StartLoc, "invalid register name",
+ SMRange(StartLoc, Tok.getEndLoc()));
+ }
- EndLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
Parser.Lex(); // Eat identifier token.
return false;
}
-X86Operand *X86ATTAsmParser::ParseOperand() {
+X86Operand *X86AsmParser::ParseOperand() {
+ if (isParsingIntelSyntax())
+ return ParseIntelOperand();
+ return ParseATTOperand();
+}
+
+/// getIntelMemOperandSize - Return intel memory operand size.
+static unsigned getIntelMemOperandSize(StringRef OpStr) {
+ unsigned Size = 0;
+ if (OpStr == "BYTE") Size = 8;
+ if (OpStr == "WORD") Size = 16;
+ if (OpStr == "DWORD") Size = 32;
+ if (OpStr == "QWORD") Size = 64;
+ if (OpStr == "XWORD") Size = 80;
+ if (OpStr == "XMMWORD") Size = 128;
+ if (OpStr == "YMMWORD") Size = 256;
+ return Size;
+}
+
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+ unsigned Size) {
+ unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+ SMLoc Start = Parser.getTok().getLoc(), End;
+
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
+
+ // Eat '['
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ Parser.Lex();
+
+ if (getLexer().is(AsmToken::Identifier)) {
+ // Parse BaseReg
+ if (ParseRegister(BaseReg, Start, End)) {
+ // Handle '[' 'symbol' ']'
+ if (getParser().ParseExpression(Disp, End)) return 0;
+ if (getLexer().isNot(AsmToken::RBrac))
+ return ErrorOperand(Start, "Expected ']' token!");
+ Parser.Lex();
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+ }
+ } else if (getLexer().is(AsmToken::Integer)) {
+ int64_t Val = Parser.getTok().getIntVal();
+ Parser.Lex();
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getLexer().is(AsmToken::RBrac)) {
+ // Handle '[' number ']'
+ Parser.Lex();
+ const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
+ if (SegReg)
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
+ Start, End, Size);
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+ } else if (getLexer().is(AsmToken::Star)) {
+ // Handle '[' Scale*IndexReg ']'
+ Parser.Lex();
+ SMLoc IdxRegLoc = Parser.getTok().getLoc();
+ if (ParseRegister(IndexReg, IdxRegLoc, End))
+ return ErrorOperand(IdxRegLoc, "Expected register");
+ Scale = Val;
+ } else
+ return ErrorOperand(Loc, "Unepxeted token");
+ }
+
+ if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) {
+ bool isPlus = getLexer().is(AsmToken::Plus);
+ Parser.Lex();
+ SMLoc PlusLoc = Parser.getTok().getLoc();
+ if (getLexer().is(AsmToken::Integer)) {
+ int64_t Val = Parser.getTok().getIntVal();
+ Parser.Lex();
+ if (getLexer().is(AsmToken::Star)) {
+ Parser.Lex();
+ SMLoc IdxRegLoc = Parser.getTok().getLoc();
+ if (ParseRegister(IndexReg, IdxRegLoc, End))
+ return ErrorOperand(IdxRegLoc, "Expected register");
+ Scale = Val;
+ } else if (getLexer().is(AsmToken::RBrac)) {
+ const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
+ Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
+ } else
+ return ErrorOperand(PlusLoc, "unexpected token after +");
+ } else if (getLexer().is(AsmToken::Identifier)) {
+ // This could be an index register or a displacement expression.
+ End = Parser.getTok().getLoc();
+ if (!IndexReg)
+ ParseRegister(IndexReg, Start, End);
+ else if (getParser().ParseExpression(Disp, End)) return 0;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::RBrac))
+ if (getParser().ParseExpression(Disp, End)) return 0;
+
+ End = Parser.getTok().getLoc();
+ if (getLexer().isNot(AsmToken::RBrac))
+ return ErrorOperand(End, "expected ']' token!");
+ Parser.Lex();
+ End = Parser.getTok().getLoc();
+
+ // handle [-42]
+ if (!BaseReg && !IndexReg)
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+ Start, End, Size);
+}
+
+/// ParseIntelMemOperand - Parse intel style memory operand.
+X86Operand *X86AsmParser::ParseIntelMemOperand() {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ unsigned SegReg = 0;
+
+ unsigned Size = getIntelMemOperandSize(Tok.getString());
+ if (Size) {
+ Parser.Lex();
+ assert (Tok.getString() == "PTR" && "Unexpected token!");
+ Parser.Lex();
+ }
+
+ if (getLexer().is(AsmToken::LBrac))
+ return ParseIntelBracExpression(SegReg, Size);
+
+ if (!ParseRegister(SegReg, Start, End)) {
+ // Handel SegReg : [ ... ]
+ if (getLexer().isNot(AsmToken::Colon))
+ return ErrorOperand(Start, "Expected ':' token!");
+ Parser.Lex(); // Eat :
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ return ParseIntelBracExpression(SegReg, Size);
+ }
+
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ if (getParser().ParseExpression(Disp, End)) return 0;
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+}
+
+X86Operand *X86AsmParser::ParseIntelOperand() {
+ SMLoc Start = Parser.getTok().getLoc(), End;
+
+ // immediate.
+ if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
+ getLexer().is(AsmToken::Minus)) {
+ const MCExpr *Val;
+ if (!getParser().ParseExpression(Val, End)) {
+ End = Parser.getTok().getLoc();
+ return X86Operand::CreateImm(Val, Start, End);
+ }
+ }
+
+ // register
+ unsigned RegNo = 0;
+ if (!ParseRegister(RegNo, Start, End)) {
+ End = Parser.getTok().getLoc();
+ return X86Operand::CreateReg(RegNo, Start, End);
+ }
+
+ // mem operand
+ return ParseIntelMemOperand();
+}
+
+X86Operand *X86AsmParser::ParseATTOperand() {
switch (getLexer().getKind()) {
default:
// Parse a memory operand with no segment register.
@@ -497,7 +755,8 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
- Error(Start, "%eiz and %riz can only be used as index registers");
+ Error(Start, "%eiz and %riz can only be used as index registers",
+ SMRange(Start, End));
return 0;
}
@@ -524,7 +783,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
-X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
@@ -579,18 +838,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+ SMLoc IndexLoc;
if (getLexer().is(AsmToken::Percent)) {
- SMLoc L;
- if (ParseRegister(BaseReg, L, L)) return 0;
+ SMLoc StartLoc, EndLoc;
+ if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
- Error(L, "eiz and riz can only be used as index registers");
+ Error(StartLoc, "eiz and riz can only be used as index registers",
+ SMRange(StartLoc, EndLoc));
return 0;
}
}
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
+ IndexLoc = Parser.getTok().getLoc();
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
@@ -616,8 +878,10 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t ScaleVal;
- if (getParser().ParseAbsoluteExpression(ScaleVal))
+ if (getParser().ParseAbsoluteExpression(ScaleVal)){
+ Error(Loc, "expected scale expression");
return 0;
+ }
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
@@ -650,11 +914,28 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
+ // If we have both a base register and an index register make sure they are
+ // both 64-bit or 32-bit registers.
+ if (BaseReg != 0 && IndexReg != 0) {
+ if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
+ !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+ IndexReg != X86::RIZ) {
+ Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
+ return 0;
+ }
+ if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
+ !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+ IndexReg != X86::EIZ){
+ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
+ return 0;
+ }
+ }
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
-bool X86ATTAsmParser::
+bool X86AsmParser::
ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
StringRef PatchedName = Name;
@@ -669,20 +950,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
- bool IsVCMP = PatchedName.startswith("vcmp");
+ bool IsVCMP = PatchedName[0] == 'v';
unsigned SSECCIdx = IsVCMP ? 4 : 3;
unsigned SSEComparisonCode = StringSwitch<unsigned>(
PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
- .Case("eq", 0)
- .Case("lt", 1)
- .Case("le", 2)
- .Case("unord", 3)
- .Case("neq", 4)
- .Case("nlt", 5)
- .Case("nle", 6)
- .Case("ord", 7)
- .Case("eq_uq", 8)
- .Case("nge", 9)
+ .Case("eq", 0x00)
+ .Case("lt", 0x01)
+ .Case("le", 0x02)
+ .Case("unord", 0x03)
+ .Case("neq", 0x04)
+ .Case("nlt", 0x05)
+ .Case("nle", 0x06)
+ .Case("ord", 0x07)
+ /* AVX only from here */
+ .Case("eq_uq", 0x08)
+ .Case("nge", 0x09)
.Case("ngt", 0x0A)
.Case("false", 0x0B)
.Case("neq_oq", 0x0C)
@@ -706,7 +988,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
- if (SSEComparisonCode != ~0U) {
+ if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
getParser().getContext());
if (PatchedName.endswith("ss")) {
@@ -724,10 +1006,9 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
- if (ExtraImmOp)
+ if (ExtraImmOp && !isParsingIntelSyntax())
Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
-
// Determine whether this is an instruction prefix.
bool isPrefix =
Name == "lock" || Name == "rep" ||
@@ -781,6 +1062,9 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
else if (isPrefix && getLexer().is(AsmToken::Slash))
Parser.Lex(); // Consume the prefix separator Slash
+ if (ExtraImmOp && isParsingIntelSyntax())
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
// This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
// "outb %al, %dx". Out doesn't take a memory form, but this is a widely
// documented form in various unofficial manuals, so a lot of code uses it.
@@ -916,11 +1200,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Name.startswith("rcl") || Name.startswith("rcr") ||
Name.startswith("rol") || Name.startswith("ror")) &&
Operands.size() == 3) {
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[1];
- Operands.erase(Operands.begin() + 1);
+ if (isParsingIntelSyntax()) {
+ // Intel syntax
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[2];
+ Operands.pop_back();
+ }
+ } else {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ }
}
}
@@ -939,7 +1233,246 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
return false;
}
-bool X86ATTAsmParser::
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::AND16ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::AND32i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::AND32ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::AND64i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::AND64ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::XOR16i16: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::XOR16ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::XOR32i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::XOR32ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::XOR64i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::XOR64ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::OR16i16: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::OR16ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::OR32i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::OR32ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::OR64i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::OR64ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::CMP16i16: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::CMP16ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::CMP32i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::CMP32ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::CMP64i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::CMP64ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::ADD16i16: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::ADD16ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::ADD32i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::ADD32ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::ADD64i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::ADD64ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::SUB16i16: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::SUB16ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::SUB32i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::SUB32ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ case X86::SUB64i32: {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::SUB64ri8);
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+ }
+ }
+}
+
+bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
@@ -957,6 +1490,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
+ Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
const char *Repl =
@@ -980,9 +1514,17 @@ MatchAndEmitInstruction(SMLoc IDLoc,
MCInst Inst;
// First, try a direct match.
- switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo,
+ isParsingIntelSyntax())) {
default: break;
case Match_Success:
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other.
+ while (processInstruction(Inst, Operands))
+ ;
+
+ Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
return false;
case Match_MissingFeature:
@@ -1040,6 +1582,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
(Match1 == Match_Success) + (Match2 == Match_Success) +
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
+ Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
return false;
}
@@ -1078,21 +1621,24 @@ MatchAndEmitInstruction(SMLoc IDLoc,
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
- Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
- return true;
+ return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
+ Op->getLocRange());
}
// Recover location info for the operand if we know which was the problem.
- SMLoc ErrorLoc = IDLoc;
if (OrigErrorInfo != ~0U) {
if (OrigErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
+ if (Operand->getStartLoc().isValid()) {
+ SMRange OperandRange = Operand->getLocRange();
+ return Error(Operand->getStartLoc(), "invalid operand for instruction",
+ OperandRange);
+ }
}
- return Error(ErrorLoc, "invalid operand for instruction");
+ return Error(IDLoc, "invalid operand for instruction");
}
// If one instruction matched with a missing feature, report this as a
@@ -1112,24 +1658,34 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
// If all of these were an outright failure, report it in a useless way.
- // FIXME: We should give nicer diagnostics about the exact failure.
Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
return true;
}
-bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return ParseDirectiveWord(2, DirectiveID.getLoc());
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
+ else if (IDVal.startswith(".intel_syntax")) {
+ getParser().setAssemblerDialect(1);
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if(Parser.getTok().getString() == "noprefix") {
+ // FIXME : Handle noprefix
+ Parser.Lex();
+ } else
+ return true;
+ }
+ return false;
+ }
return true;
}
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
-bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -1154,7 +1710,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
/// ParseDirectiveCode
/// ::= .code32 | .code64
-bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
if (IDVal == ".code32") {
Parser.Lex();
if (is64BitMode()) {
@@ -1179,8 +1735,8 @@ extern "C" void LLVMInitializeX86AsmLexer();
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
- RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target);
- RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+ RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
+ RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
LLVMInitializeX86AsmLexer();
}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 351e7675a7e8..f612e2365ec3 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -1,16 +1,16 @@
set(LLVM_TARGET_DEFINITIONS X86.td)
-llvm_tablegen(X86GenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
-llvm_tablegen(X86GenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(X86GenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-llvm_tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(X86GenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(X86GenFastISel.inc -gen-fast-isel)
-llvm_tablegen(X86GenCallingConv.inc -gen-callingconv)
-llvm_tablegen(X86GenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM X86GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
+tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(X86CommonTableGen)
set(sources
@@ -26,6 +26,7 @@ set(sources
X86InstrInfo.cpp
X86JITInfo.cpp
X86MCInstLower.cpp
+ X86MachineFunctionInfo.cpp
X86RegisterInfo.cpp
X86SelectionDAGInfo.cpp
X86Subtarget.cpp
@@ -51,19 +52,6 @@ endif()
add_llvm_target(X86CodeGen ${sources})
-add_llvm_library_dependencies(LLVMX86CodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMX86AsmPrinter
- LLVMX86Desc
- )
-
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 4f570d56e60f..0cd6db96dabe 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler
X86DisassemblerDecoder.c
)
-add_llvm_library_dependencies(LLVMX86Disassembler
- LLVMMC
- LLVMSupport
- LLVMX86Info
- )
-
# workaround for hanging compilation on MSVC9 and 10
if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..0609f3c28de3
--- /dev/null
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Disassembler
+parent = X86
+required_libraries = MC Support X86Desc X86Info
+add_to_library_groups = X86
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 3aacb20e73df..8278bde7c218 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,9 +18,11 @@
#include "X86DisassemblerDecoder.h"
#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
@@ -42,6 +44,11 @@ void x86DisassemblerDebug(const char *file,
dbgs() << file << ":" << line << ": " << s;
}
+const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) {
+ const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
+ return MII->getName(Opcode);
+}
+
#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
namespace llvm {
@@ -65,17 +72,19 @@ extern Target TheX86_32Target, TheX86_64Target;
}
static bool translateInstruction(MCInst &target,
- InternalInstruction &source);
+ InternalInstruction &source,
+ const MCDisassembler *Dis);
-X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) :
- MCDisassembler(STI),
- fMode(mode) {
-}
+X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
+ DisassemblerMode mode,
+ const MCInstrInfo *MII)
+ : MCDisassembler(STI), MII(MII), fMode(mode) {}
X86GenericDisassembler::~X86GenericDisassembler() {
+ delete MII;
}
-EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
return instInfoX86;
}
@@ -116,6 +125,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const {
+ CommentStream = &cStream;
+
InternalInstruction internalInstr;
dlog_t loggerFn = logger;
@@ -127,6 +138,7 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
(void*)&region,
loggerFn,
(void*)&vStream,
+ (void*)MII,
address,
fMode);
@@ -136,7 +148,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
}
else {
size = internalInstr.length;
- return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
+ return (!translateInstruction(instr, internalInstr, this)) ?
+ Success : Fail;
}
}
@@ -161,6 +174,140 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
}
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst.
+///
+/// @param Value - The immediate Value, has had any PC adjustment made by
+/// the caller.
+/// @param isBranch - If the instruction is a branch instruction
+/// @param Address - The starting address of the instruction
+/// @param Offset - The byte offset to this immediate in the instruction
+/// @param Width - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width. If that
+/// returns non-zero then the symbolic information it returns is used to create
+/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created. This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Width, MCInst &MI,
+ const MCDisassembler *Dis) {
+ LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+ struct LLVMOpInfo1 SymbolicOp;
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ SymbolicOp.Value = Value;
+ void *DisInfo = Dis->getDisInfoBlock();
+
+ if (!getOpInfo ||
+ !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
+ // Clear SymbolicOp.Value from above and also all other fields.
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (!SymbolLookUp)
+ return false;
+ uint64_t ReferenceType;
+ if (isBranch)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ else
+ ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ }
+ // For branches always create an MCExpr so it gets printed as hex address.
+ else if (isBranch) {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ if (!Name && !isBranch)
+ return false;
+ }
+
+ MCContext *Ctx = Dis->getMCContext();
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+
+ return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the rip.
+/// These can often be addresses in a literal pool. The Address of the
+/// instruction and its immediate Value are used to determine the address
+/// being referenced in the literal pool entry. The SymbolLookUp call back will
+/// return a pointer to a literal 'C' string if the referenced address is an
+/// address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ void *DisInfo = Dis->getDisInfoBlock();
+ uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+ const char *ReferenceName;
+ (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+ }
+}
+
/// translateImmediate - Appends an immediate operand to an MCInst.
///
/// @param mcInst - The MCInst to append to.
@@ -169,10 +316,11 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// @param insn - The internal instruction.
static void translateImmediate(MCInst &mcInst, uint64_t immediate,
const OperandSpecifier &operand,
- InternalInstruction &insn) {
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
// Sign-extend the immediate if necessary.
- OperandType type = operand.type;
+ OperandType type = (OperandType)operand.type;
if (type == TYPE_RELv) {
switch (insn.displacementSize) {
@@ -225,6 +373,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
}
+ bool isBranch = false;
+ uint64_t pcrel = 0;
switch (type) {
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
@@ -232,8 +382,11 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_XMM256:
mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
return;
- case TYPE_MOFFS8:
case TYPE_REL8:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ // fall through to sign extend the immediate if needed.
+ case TYPE_MOFFS8:
if(immediate & 0x80)
immediate |= ~(0xffull);
break;
@@ -241,9 +394,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
if(immediate & 0x8000)
immediate |= ~(0xffffull);
break;
- case TYPE_MOFFS32:
case TYPE_REL32:
case TYPE_REL64:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ // fall through to sign extend the immediate if needed.
+ case TYPE_MOFFS32:
if(immediate & 0x80000000)
immediate |= ~(0xffffffffull);
break;
@@ -253,7 +409,10 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
break;
}
- mcInst.addOperand(MCOperand::CreateImm(immediate));
+ if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
+ insn.immediateOffset, insn.immediateSize,
+ mcInst, Dis))
+ mcInst.addOperand(MCOperand::CreateImm(immediate));
}
/// translateRMRegister - Translates a register stored in the R/M field of the
@@ -300,7 +459,8 @@ static bool translateRMRegister(MCInst &mcInst,
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
/// @return - 0 on success; nonzero otherwise
-static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
+ const MCDisassembler *Dis) {
// Addresses in an MCInst are represented as five operands:
// 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
@@ -318,6 +478,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
MCOperand indexReg;
MCOperand displacement;
MCOperand segmentReg;
+ uint64_t pcrel = 0;
if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
if (insn.sibBase != SIB_BASE_NONE) {
@@ -359,8 +520,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
return true;
}
- if (insn.mode == MODE_64BIT)
+ if (insn.mode == MODE_64BIT){
+ pcrel = insn.startLocation +
+ insn.displacementOffset + insn.displacementSize;
+ tryAddingPcLoadReferenceComment(insn.startLocation +
+ insn.displacementOffset,
+ insn.displacement + pcrel, Dis);
baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+ }
else
baseReg = MCOperand::CreateReg(0);
@@ -426,7 +593,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
- mcInst.addOperand(displacement);
+ if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
+ insn.startLocation, insn.displacementOffset,
+ insn.displacementSize, mcInst, Dis))
+ mcInst.addOperand(displacement);
mcInst.addOperand(segmentReg);
return false;
}
@@ -440,7 +610,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
- InternalInstruction &insn) {
+ InternalInstruction &insn, const MCDisassembler *Dis) {
switch (operand.type) {
default:
debug("Unexpected type for a R/M operand");
@@ -480,7 +650,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_M1632:
case TYPE_M1664:
case TYPE_LEA:
- return translateRMMemory(mcInst, insn);
+ return translateRMMemory(mcInst, insn, Dis);
}
}
@@ -510,7 +680,8 @@ static bool translateFPRegister(MCInst &mcInst,
/// @param insn - The internal instruction.
/// @return - false on success; true otherwise.
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
- InternalInstruction &insn) {
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
switch (operand.encoding) {
default:
debug("Unhandled operand encoding during translation");
@@ -519,7 +690,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateRegister(mcInst, insn.reg);
return false;
case ENCODING_RM:
- return translateRM(mcInst, operand, insn);
+ return translateRM(mcInst, operand, insn, Dis);
case ENCODING_CB:
case ENCODING_CW:
case ENCODING_CD:
@@ -537,7 +708,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateImmediate(mcInst,
insn.immediates[insn.numImmediatesTranslated++],
operand,
- insn);
+ insn,
+ Dis);
return false;
case ENCODING_RB:
case ENCODING_RW:
@@ -556,7 +728,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_DUP:
return translateOperand(mcInst,
insn.spec->operands[operand.type - TYPE_DUP0],
- insn);
+ insn, Dis);
}
}
@@ -567,7 +739,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
/// @param insn - The internal instruction.
/// @return - false on success; true otherwise.
static bool translateInstruction(MCInst &mcInst,
- InternalInstruction &insn) {
+ InternalInstruction &insn,
+ const MCDisassembler *Dis) {
if (!insn.spec) {
debug("Instruction has no specification");
return true;
@@ -581,7 +754,7 @@ static bool translateInstruction(MCInst &mcInst,
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
if (insn.spec->operands[index].encoding != ENCODING_NONE) {
- if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+ if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
return true;
}
}
@@ -590,12 +763,16 @@ static bool translateInstruction(MCInst &mcInst,
return false;
}
-static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) {
- return new X86Disassembler::X86_32Disassembler(STI);
+static MCDisassembler *createX86_32Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT,
+ T.createMCInstrInfo());
}
-static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) {
- return new X86Disassembler::X86_64Disassembler(STI);
+static MCDisassembler *createX86_64Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT,
+ T.createMCInstrInfo());
}
extern "C" void LLVMInitializeX86Disassembler() {
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 6ac9a0ff1019..c11f51c6a9ac 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -1,4 +1,4 @@
-//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -78,7 +78,7 @@
const char* name;
#define INSTRUCTION_IDS \
- const InstrUID *instructionIDs;
+ unsigned instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
@@ -87,11 +87,10 @@
#include "llvm/MC/MCDisassembler.h"
-struct InternalInstruction;
-
namespace llvm {
class MCInst;
+class MCInstrInfo;
class MCSubtargetInfo;
class MemoryObject;
class raw_ostream;
@@ -104,13 +103,16 @@ namespace X86Disassembler {
/// All each platform class should have to do is subclass the constructor, and
/// provide a different disassemblerMode value.
class X86GenericDisassembler : public MCDisassembler {
-protected:
+ const MCInstrInfo *MII;
+public:
/// Constructor - Initializes the disassembler.
///
/// @param mode - The X86 architecture mode to decode for.
- X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode);
-public:
+ X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode,
+ const MCInstrInfo *MII);
+private:
~X86GenericDisassembler();
+public:
/// getInstruction - See MCDisassembler.
DecodeStatus getInstruction(MCInst &instr,
@@ -121,37 +123,13 @@ public:
raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
+ const EDInstInfo *getEDInfo() const;
private:
DisassemblerMode fMode;
};
-/// X86_16Disassembler - 16-bit X86 disassembler.
-class X86_16Disassembler : public X86GenericDisassembler {
-public:
- X86_16Disassembler(const MCSubtargetInfo &STI) :
- X86GenericDisassembler(STI, MODE_16BIT) {
- }
-};
-
-/// X86_16Disassembler - 32-bit X86 disassembler.
-class X86_32Disassembler : public X86GenericDisassembler {
-public:
- X86_32Disassembler(const MCSubtargetInfo &STI) :
- X86GenericDisassembler(STI, MODE_32BIT) {
- }
-};
-
-/// X86_16Disassembler - 64-bit X86 disassembler.
-class X86_64Disassembler : public X86GenericDisassembler {
-public:
- X86_64Disassembler(const MCSubtargetInfo &STI) :
- X86GenericDisassembler(STI, MODE_64BIT) {
- }
-};
-
} // namespace X86Disassembler
-
+
} // namespace llvm
-
+
#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index f9b0fe5d51b9..602087756b23 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
*
* The LLVM Compiler Infrastructure
*
@@ -82,11 +82,9 @@ static int modRMRequired(OpcodeType type,
decision = &THREEBYTEA7_SYM;
break;
}
-
+
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
modrm_type != MODRM_ONEENTRY;
-
- return 0;
}
/*
@@ -103,12 +101,9 @@ static InstrUID decode(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode,
uint8_t modRM) {
- const struct ModRMDecision* dec;
+ const struct ModRMDecision* dec = 0;
switch (type) {
- default:
- debug("Unknown opcode type");
- return 0;
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
@@ -134,14 +129,17 @@ static InstrUID decode(OpcodeType type,
debug("Corrupt table! Unknown modrm_type");
return 0;
case MODRM_ONEENTRY:
- return dec->instructionIDs[0];
+ return modRMTable[dec->instructionIDs];
case MODRM_SPLITRM:
if (modFromModRM(modRM) == 0x3)
- return dec->instructionIDs[1];
- else
- return dec->instructionIDs[0];
+ return modRMTable[dec->instructionIDs+1];
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITREG:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
case MODRM_FULL:
- return dec->instructionIDs[modRM];
+ return modRMTable[dec->instructionIDs+modRM];
}
}
@@ -314,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (consumeByte(insn, &byte))
return -1;
+
+ /*
+ * If the first byte is a LOCK prefix break and let it be disassembled
+ * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+ * FIXME there is currently no way to get the disassembler to print the
+ * lock prefix if it is not the first byte.
+ */
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
switch (byte) {
case 0xf0: /* LOCK */
@@ -712,7 +719,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
* @return - 0 if the ModR/M could be read when needed or was not needed;
* nonzero otherwise.
*/
-static int getID(struct InternalInstruction* insn) {
+static int getID(struct InternalInstruction* insn, void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
@@ -765,6 +772,8 @@ static int getID(struct InternalInstruction* insn) {
else {
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_ADSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
@@ -773,17 +782,20 @@ static int getID(struct InternalInstruction* insn) {
if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW;
-
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
-
+
/* The following clauses compensate for limitations of the tables. */
-
- if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
+
+ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
+ !(attrMask & ATTR_OPSIZE)) {
/*
* Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
* has precedence since there are no L-bit with W-bit entries in the tables.
* So if the L-bit isn't significant we should use the W-bit instead.
+ * We only need to do this if the instruction doesn't specify OpSize since
+ * there is a VEX_L_W_OPSIZE table.
*/
const struct InstructionSpecifier *spec;
@@ -823,7 +835,7 @@ static int getID(struct InternalInstruction* insn) {
const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
- const struct InstructionSpecifier *specWithOpsize;
+ const char *specName, *specWithOpSizeName;
spec = specifierForUID(instructionID);
@@ -840,11 +852,13 @@ static int getID(struct InternalInstruction* insn) {
return 0;
}
- specWithOpsize = specifierForUID(instructionIDWithOpsize);
-
- if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+ specName = x86DisassemblerGetInstrName(instructionID, miiArg);
+ specWithOpSizeName =
+ x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
+
+ if (is16BitEquvalent(specName, specWithOpSizeName)) {
insn->instructionID = instructionIDWithOpsize;
- insn->spec = specWithOpsize;
+ insn->spec = specifierForUID(instructionIDWithOpsize);
} else {
insn->instructionID = instructionID;
insn->spec = spec;
@@ -1011,6 +1025,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
return 0;
insn->consumedDisplacement = TRUE;
+ insn->displacementOffset = insn->readerCursor - insn->startLocation;
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
@@ -1407,6 +1422,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
size = insn->immediateSize;
else
insn->immediateSize = size;
+ insn->immediateOffset = insn->readerCursor - insn->startLocation;
switch (size) {
case 1:
@@ -1469,6 +1485,7 @@ static int readVVVV(struct InternalInstruction* insn) {
static int readOperands(struct InternalInstruction* insn) {
int index;
int hasVVVV, needVVVV;
+ int sawRegImm = 0;
dbgprintf(insn, "readOperands()");
@@ -1497,11 +1514,25 @@ static int readOperands(struct InternalInstruction* insn) {
dbgprintf(insn, "We currently don't hande code-offset encodings");
return -1;
case ENCODING_IB:
+ if (sawRegImm) {
+ /* Saw a register immediate so don't read again and instead split the
+ previous immediate. FIXME: This is a hack. */
+ insn->immediates[insn->numImmediatesConsumed] =
+ insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+ ++insn->numImmediatesConsumed;
+ break;
+ }
if (readImmediate(insn, 1))
return -1;
if (insn->spec->operands[index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
+ if (insn->spec->operands[index].type == TYPE_IMM5 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 31)
+ return -1;
+ if (insn->spec->operands[index].type == TYPE_XMM128 ||
+ insn->spec->operands[index].type == TYPE_XMM256)
+ sawRegImm = 1;
break;
case ENCODING_IW:
if (readImmediate(insn, 2))
@@ -1593,6 +1624,7 @@ int decodeInstruction(struct InternalInstruction* insn,
void* readerArg,
dlog_t logger,
void* loggerArg,
+ void* miiArg,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
@@ -1608,7 +1640,7 @@ int decodeInstruction(struct InternalInstruction* insn,
if (readPrefixes(insn) ||
readOpcode(insn) ||
- getID(insn) ||
+ getID(insn, miiArg) ||
insn->instructionID == 0 ||
readOperands(insn))
return -1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index a9c90f8f9bda..fae309b45d02 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
*
* The LLVM Compiler Infrastructure
*
@@ -20,11 +20,10 @@
extern "C" {
#endif
-#define INSTRUCTION_SPECIFIER_FIELDS \
- const char* name;
+#define INSTRUCTION_SPECIFIER_FIELDS
#define INSTRUCTION_IDS \
- const InstrUID *instructionIDs;
+ unsigned instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
@@ -460,6 +459,11 @@ struct InternalInstruction {
uint8_t addressSize;
uint8_t displacementSize;
uint8_t immediateSize;
+
+ /* Offsets from the start of the instruction to the pieces of data, which is
+ needed to find relocation entries for adding symbolic operands */
+ uint8_t displacementOffset;
+ uint8_t immediateOffset;
/* opcode state */
@@ -554,6 +558,7 @@ int decodeInstruction(struct InternalInstruction* insn,
void* readerArg,
dlog_t logger,
void* loggerArg,
+ void* miiArg,
uint64_t startLoc,
DisassemblerMode mode);
@@ -568,6 +573,8 @@ void x86DisassemblerDebug(const char *file,
unsigned line,
const char *s);
+const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 8b7933545a56..13e113609bf3 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===*
*
* The LLVM Compiler Infrastructure
*
@@ -54,8 +54,9 @@
ENUM_ENTRY(ATTR_XD, 0x04) \
ENUM_ENTRY(ATTR_REXW, 0x08) \
ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
- ENUM_ENTRY(ATTR_VEX, 0x20) \
- ENUM_ENTRY(ATTR_VEXL, 0x40)
+ ENUM_ENTRY(ATTR_ADSIZE, 0x20) \
+ ENUM_ENTRY(ATTR_VEX, 0x40) \
+ ENUM_ENTRY(ATTR_VEXL, 0x80)
#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
@@ -77,6 +78,8 @@ enum attributeBits {
"64-bit mode but no more") \
ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
"operands change width") \
+ ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \
+ "operands change width") \
ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
"but not the operands") \
ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
@@ -88,6 +91,7 @@ enum attributeBits {
ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
"change width; overrides IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \
ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
"secondary") \
ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
@@ -111,7 +115,8 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
- ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")
#define ENUM_ENTRY(n, r, d) n,
@@ -155,6 +160,8 @@ typedef uint16_t InstrUID;
* MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
* corresponds to one instruction; otherwise, it corresponds to
* a different instruction.
+ * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
+ corresponds to instructions that use reg field as opcode
* MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
* to a different instruction.
*/
@@ -162,6 +169,7 @@ typedef uint16_t InstrUID;
#define MODRMTYPES \
ENUM_ENTRY(MODRM_ONEENTRY) \
ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_SPLITREG) \
ENUM_ENTRY(MODRM_FULL)
#define ENUM_ENTRY(n) n,
@@ -265,6 +273,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_IMM32, "4-byte") \
ENUM_ENTRY(TYPE_IMM64, "8-byte") \
ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
+ ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
ENUM_ENTRY(TYPE_RM16, "2-byte") \
ENUM_ENTRY(TYPE_RM32, "4-byte") \
@@ -335,8 +344,8 @@ typedef enum {
* operand.
*/
struct OperandSpecifier {
- OperandEncoding encoding;
- OperandType type;
+ uint8_t encoding;
+ uint8_t type;
};
/*
@@ -363,7 +372,7 @@ typedef enum {
* its operands.
*/
struct InstructionSpecifier {
- ModifierType modifierType;
+ uint8_t modifierType;
uint8_t modifierBase;
struct OperandSpecifier operands[X86_MAX_OPERANDS];
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 2a2b5dbb43db..28e2460d8233 100644
--- a/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
X86InstComments.cpp
)
-add_llvm_library_dependencies(LLVMX86AsmPrinter
- LLVMMC
- LLVMSupport
- LLVMX86Utils
- )
-
add_dependencies(LLVMX86AsmPrinter X86CommonTableGen)
diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..6868ddefa51f
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86AsmPrinter
+parent = X86
+required_libraries = MC Support X86Utils
+add_to_library_groups = X86
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 029d491260f6..5118e4cad4e2 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -19,6 +19,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
@@ -26,14 +28,9 @@
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define GET_INSTRUCTION_NAME
#define PRINT_ALIAS_INSTR
#include "X86GenAsmWriter.inc"
-X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {
-}
-
void X86ATTInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
OS << '%' << getRegisterName(RegNo);
@@ -45,29 +42,50 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
+ // Next always print the annotation.
+ printAnnotation(OS, Annot);
+
// If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream) {
- printAnnotation(OS, Annot);
+ if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
- }
-}
-
-StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
}
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
- default: assert(0 && "Invalid ssecc argument!");
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
+ default: llvm_unreachable("Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ case 0x10: O << "eq_os"; break;
+ case 0x11: O << "lt_oq"; break;
+ case 0x12: O << "le_oq"; break;
+ case 0x13: O << "unord_s"; break;
+ case 0x14: O << "neq_us"; break;
+ case 0x15: O << "nlt_uq"; break;
+ case 0x16: O << "nle_uq"; break;
+ case 0x17: O << "ord_s"; break;
+ case 0x18: O << "eq_us"; break;
+ case 0x19: O << "nge_uq"; break;
+ case 0x1a: O << "ngt_uq"; break;
+ case 0x1b: O << "false_os"; break;
+ case 0x1c: O << "neq_os"; break;
+ case 0x1d: O << "ge_oq"; break;
+ case 0x1e: O << "gt_oq"; break;
+ case 0x1f: O << "true_us"; break;
}
}
@@ -79,11 +97,21 @@ void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm())
- // Print this as a signed 32-bit value.
- O << (int)Op.getImm();
+ O << Op.getImm();
else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
- O << *Op.getExpr();
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
}
}
@@ -97,7 +125,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << '$' << (int64_t)Op.getImm();
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
- *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+ *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 0293869b0a9b..2e00bff1738e 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//==- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -22,11 +22,12 @@ class MCOperand;
class X86ATTInstPrinter : public MCInstPrinter {
public:
- X86ATTInstPrinter(const MCAsmInfo &MAI);
-
+ X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
@@ -35,7 +36,6 @@ public:
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
- static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 8d85b95fe81d..f532019acdff 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -29,11 +29,17 @@ using namespace llvm;
void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned)) {
// If this is a shuffle operation, the switch should fill in this state.
- SmallVector<unsigned, 8> ShuffleMask;
+ SmallVector<int, 8> ShuffleMask;
const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
switch (MI->getOpcode()) {
case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+ case X86::VINSERTPSrr:
+ DestName = getRegName(MI->getOperand(0).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
Src2Name = getRegName(MI->getOperand(2).getReg());
DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
@@ -44,34 +50,61 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodeMOVLHPSMask(2, ShuffleMask);
break;
+ case X86::VMOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
case X86::MOVHLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::VMOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
case X86::PSHUFDri:
+ case X86::VPSHUFDri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFDmi:
+ case X86::VPSHUFDmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ DecodePSHUFMask(MVT::v4i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFDYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(MVT::v8i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
+
case X86::PSHUFHWri:
+ case X86::VPSHUFHWri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFHWmi:
+ case X86::VPSHUFHWmi:
DestName = getRegName(MI->getOperand(0).getReg());
DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PSHUFLWri:
+ case X86::VPSHUFLWri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFLWmi:
+ case X86::VPSHUFLWmi:
DestName = getRegName(MI->getOperand(0).getReg());
DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
@@ -82,28 +115,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKHBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(16, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKHWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(8, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKHDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(4, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKHQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(2, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
break;
case X86::PUNPCKLBWrr:
@@ -111,126 +208,284 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLBWMask(16, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLWDMask(8, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLDQMask(4, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLQDQMask(2, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
break;
case X86::SHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPDrmi:
- DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VSHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDrmi:
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDYrmi:
+ DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::SHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPSrmi:
- DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VSHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSrmi:
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSYrmi:
+ DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPDMask(2, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDrm:
- DecodeUNPCKLPDMask(2, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDYrm:
- DecodeUNPCKLPDMask(4, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPSMask(4, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSrm:
- DecodeUNPCKLPSMask(4, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSYrm:
- DecodeUNPCKLPSMask(8, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPDrm:
- DecodeUNPCKHPMask(2, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDrm:
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDYrm:
+ DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPSrm:
- DecodeUNPCKHPMask(4, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSrm:
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSYrm:
+ DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::VPERMILPSri:
- DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSmi:
+ DecodePSHUFMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSYri:
- DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSYmi:
+ DecodePSHUFMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDri:
- DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDmi:
+ DecodePSHUFMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDYri:
- DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDYmi:
+ DecodePSHUFMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERM2F128rr:
- DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::VPERM2I128rr:
Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPERM2F128rm:
+ case X86::VPERM2I128rm:
+ // For instruction comments purpose, assume the 256-bit vector is v4i64.
+ DecodeVPERM2X128Mask(MVT::v4i64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
}
@@ -245,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
if (Src1Name == Src2Name) {
for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
- ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] >= (int)e) // From second mask.
ShuffleMask[i] -= e;
}
}
@@ -263,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// Otherwise, it must come from src1 or src2. Print the span of elements
// that comes from this src.
- bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
const char *SrcName = isSrc1 ? Src1Name : Src2Name;
OS << (SrcName ? SrcName : "mem") << '[';
bool IsFirst = true;
while (i != e &&
(int)ShuffleMask[i] >= 0 &&
- (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
if (!IsFirst)
OS << ',';
else
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
index 6b86db4f9e5c..13fdf9af8c98 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.h
+++ b/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -1,4 +1,4 @@
-//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index f9ab5aeee122..4ea662cbe0c1 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -17,15 +17,13 @@
#include "X86InstComments.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include <cctype>
using namespace llvm;
-// Include the auto-generated portion of the assembly writer.
-#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
@@ -35,29 +33,52 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
printInstruction(MI, OS);
-
+
+ // Next always print the annotation.
+ printAnnotation(OS, Annot);
+
// If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream) {
- printAnnotation(OS, Annot);
+ if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
- }
-}
-StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
}
void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
- default: assert(0 && "Invalid ssecc argument!");
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
+ default: llvm_unreachable("Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ case 0x10: O << "eq_os"; break;
+ case 0x11: O << "lt_oq"; break;
+ case 0x12: O << "le_oq"; break;
+ case 0x13: O << "unord_s"; break;
+ case 0x14: O << "neq_us"; break;
+ case 0x15: O << "nlt_uq"; break;
+ case 0x16: O << "nle_uq"; break;
+ case 0x17: O << "ord_s"; break;
+ case 0x18: O << "eq_us"; break;
+ case 0x19: O << "nge_uq"; break;
+ case 0x1a: O << "ngt_uq"; break;
+ case 0x1b: O << "false_os"; break;
+ case 0x1c: O << "neq_os"; break;
+ case 0x1d: O << "ge_oq"; break;
+ case 0x1e: O << "gt_oq"; break;
+ case 0x1f: O << "true_us"; break;
+
}
}
@@ -70,7 +91,18 @@ void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
O << Op.getImm();
else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
- O << *Op.getExpr();
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
}
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6d5ec6226a9e..4f5938daf4cd 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -23,17 +23,16 @@ class MCOperand;
class X86IntelInstPrinter : public MCInstPrinter {
public:
- X86IntelInstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt
new file mode 100644
index 000000000000..87305e0e5f5c
--- /dev/null
+++ b/lib/Target/X86/LLVMBuild.txt
@@ -0,0 +1,35 @@
+;===- ./lib/Target/X86/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
+[component_0]
+type = TargetGroup
+name = X86
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = X86CodeGen
+parent = X86
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils
+add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 87219120e2a8..1c240e52a37d 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -2,16 +2,10 @@ add_llvm_library(LLVMX86Desc
X86AsmBackend.cpp
X86MCTargetDesc.cpp
X86MCAsmInfo.cpp
- X86MCCodeEmitter.cpp
+ X86MCCodeEmitter.cpp
X86MachObjectWriter.cpp
- )
-
-add_llvm_library_dependencies(LLVMX86Desc
- LLVMMC
- LLVMSupport
- LLVMX86AsmPrinter
- LLVMX86AsmPrinter
- LLVMX86Info
+ X86ELFObjectWriter.cpp
+ X86WinCOFFObjectWriter.cpp
)
add_dependencies(LLVMX86Desc X86CommonTableGen)
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..9e1d29ca0a65
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Desc
+parent = X86
+required_libraries = MC Support X86AsmPrinter X86Info
+add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 69ad7d7b6b32..32e40febd26a 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,10 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
@@ -37,18 +36,22 @@ MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
- default: assert(0 && "invalid fixup kind!");
+ default: llvm_unreachable("invalid fixup kind!");
case FK_PCRel_1:
+ case FK_SecRel_1:
case FK_Data_1: return 0;
case FK_PCRel_2:
+ case FK_SecRel_2:
case FK_Data_2: return 1;
case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
case X86::reloc_signed_4byte:
case X86::reloc_global_offset_table:
+ case FK_SecRel_4:
case FK_Data_4: return 2;
case FK_PCRel_8:
+ case FK_SecRel_8:
case FK_Data_8: return 3;
}
}
@@ -57,9 +60,9 @@ namespace {
class X86ELFObjectWriter : public MCELFObjectTargetWriter {
public:
- X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
- bool HasRelocationAddend)
- : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+ X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
+ bool HasRelocationAddend, bool foobar)
+ : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
};
class X86AsmBackend : public MCAsmBackend {
@@ -87,7 +90,7 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
@@ -105,11 +108,16 @@ public:
Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
- bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
- void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
- bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
};
} // end anonymous namespace
@@ -214,7 +222,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
return getRelaxedOpcodeBranch(Op);
}
-bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
// Branches can always be relaxed.
if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
return true;
@@ -244,9 +252,17 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
return hasExp && !hasRIP;
}
+bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
@@ -262,10 +278,10 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res.setOpcode(RelaxedOp);
}
-/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// writeNopData - Write optimal nops to the output file for the \arg Count
/// bytes. This returns the number of bytes written. It may return 0 if
/// the \arg Count is more than the maximum optimal nops.
-bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
static const uint8_t Nops[10][10] = {
// nop
{0x90},
@@ -310,9 +326,9 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
- Triple::OSType OSType;
- ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
- : X86AsmBackend(T), OSType(_OSType) {
+ uint8_t OSABI;
+ ELFX86AsmBackend(const Target &T, uint8_t _OSABI)
+ : X86AsmBackend(T), OSABI(_OSABI) {
HasReliableSymbolDifference = true;
}
@@ -324,31 +340,21 @@ public:
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
- : ELFX86AsmBackend(T, OSType) {}
+ ELFX86_32AsmBackend(const Target &T, uint8_t OSABI)
+ : ELFX86AsmBackend(T, OSABI) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(createELFObjectTargetWriter(),
- OS, /*IsLittleEndian*/ true);
- }
-
- MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
- return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+ return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI);
}
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
- : ELFX86AsmBackend(T, OSType) {}
+ ELFX86_64AsmBackend(const Target &T, uint8_t OSABI)
+ : ELFX86AsmBackend(T, OSABI) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(createELFObjectTargetWriter(),
- OS, /*IsLittleEndian*/ true);
- }
-
- MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
- return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+ return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI);
}
};
@@ -362,7 +368,7 @@ public:
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createWinCOFFObjectWriter(OS, Is64Bit);
+ return createX86WinCOFFObjectWriter(OS, Is64Bit);
}
};
@@ -442,7 +448,8 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
if (TheTriple.isOSWindows())
return new WindowsX86AsmBackend(T, false);
- return new ELFX86_32AsmBackend(T, TheTriple.getOS());
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFX86_32AsmBackend(T, OSABI);
}
MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
@@ -454,5 +461,6 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
if (TheTriple.isOSWindows())
return new WindowsX86AsmBackend(T, true);
- return new ELFX86_64AsmBackend(T, TheTriple.getOS());
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFX86_64AsmBackend(T, OSABI);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index e6ba705d4d87..a0bb6dc6d60f 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -19,7 +19,7 @@
#include "X86MCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
-#include <cassert>
+#include "llvm/Support/ErrorHandling.h"
namespace llvm {
@@ -164,7 +164,13 @@ namespace X86II {
/// is some TLS offset from the picbase.
///
/// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
- MO_TLVP_PIC_BASE
+ MO_TLVP_PIC_BASE,
+
+ /// MO_SECREL - On a symbol operand this indicates that the immediate is
+ /// the offset from beginning of section.
+ ///
+ /// This is the TLS offset for the COFF/Windows TLS mechanism.
+ MO_SECREL
};
enum {
@@ -223,19 +229,13 @@ namespace X86II {
// destinations are the same register.
MRMInitReg = 32,
- //// MRM_C1 - A mod/rm byte of exactly 0xC1.
- MRM_C1 = 33,
- MRM_C2 = 34,
- MRM_C3 = 35,
- MRM_C4 = 36,
- MRM_C8 = 37,
- MRM_C9 = 38,
- MRM_E8 = 39,
- MRM_F0 = 40,
- MRM_F8 = 41,
- MRM_F9 = 42,
- MRM_D0 = 45,
- MRM_D1 = 46,
+ //// MRM_XX - A mod/rm byte of exactly 0xXX.
+ MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
+ MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
+ MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
+ MRM_D4 = 47, MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50,
+ MRM_DB = 51, MRM_DC = 52, MRM_DD = 53, MRM_DE = 54,
+ MRM_DF = 55,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@@ -295,8 +295,20 @@ namespace X86II {
T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
- // TF - Prefix before and after 0x0F
- TF = 17 << Op0Shift,
+ // T8XD - Prefix before and after 0x0F. Combination of T8 and XD.
+ T8XD = 17 << Op0Shift,
+
+ // T8XS - Prefix before and after 0x0F. Combination of T8 and XS.
+ T8XS = 18 << Op0Shift,
+
+ // TAXD - Prefix before and after 0x0F. Combination of TA and XD.
+ TAXD = 19 << Op0Shift,
+
+ // XOP8 - Prefix to include use of imm byte.
+ XOP8 = 20 << Op0Shift,
+
+ // XOP9 - Prefix to exclude use of imm byte.
+ XOP9 = 21 << Op0Shift,
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -387,20 +399,24 @@ namespace X86II {
/// and the additional register is encoded in VEX_VVVV prefix.
VEX_4V = 1U << 2,
+ /// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
+ /// operand 3 with VEX.vvvv.
+ VEX_4VOp3 = 1U << 3,
+
/// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
/// must be encoded in the i8 immediate field. This usually happens in
/// instructions with 4 operands.
- VEX_I8IMM = 1U << 3,
+ VEX_I8IMM = 1U << 4,
/// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
/// instruction uses 256-bit wide registers. This is usually auto detected
/// if a VR256 register is used, but some AVX instructions also have this
/// field marked when using a f256 memory references.
- VEX_L = 1U << 4,
+ VEX_L = 1U << 5,
// VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
// prefix. Usually used for scalar instructions. Needed by disassembler.
- VEX_LIG = 1U << 5,
+ VEX_LIG = 1U << 6,
/// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
/// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
@@ -408,7 +424,15 @@ namespace X86II {
/// storing a classifier in the imm8 field. To simplify our implementation,
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 6
+ Has3DNow0F0FOpcode = 1U << 7,
+
+ /// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in
+ /// ModRM or I8IMM. This is used for FMA4 and XOP instructions.
+ MemOp4 = 1U << 8,
+
+ /// XOP - Opcode prefix used by XOP instructions.
+ XOP = 1U << 9
+
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -426,7 +450,7 @@ namespace X86II {
/// of the specified instruction.
static inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
+ default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8:
case X86II::Imm8PCRel: return 1;
case X86II::Imm16:
@@ -441,7 +465,7 @@ namespace X86II {
/// TSFlags indicates that it is pc relative.
static inline unsigned isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
+ default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8PCRel:
case X86II::Imm16PCRel:
case X86II::Imm32PCRel:
@@ -462,10 +486,10 @@ namespace X86II {
/// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
/// counted as one operand.
///
- static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
switch (TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
- default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this form");
+ default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
case X86II::Pseudo:
case X86II::RawFrm:
case X86II::AddRegFrm:
@@ -478,9 +502,12 @@ namespace X86II {
return 0;
case X86II::MRMSrcMem: {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+ if (HasMemOp4)
+ ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
@@ -495,20 +522,24 @@ namespace X86II {
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- return 0;
- case X86II::MRM_C1:
- case X86II::MRM_C2:
- case X86II::MRM_C3:
- case X86II::MRM_C4:
- case X86II::MRM_C8:
- case X86II::MRM_C9:
- case X86II::MRM_E8:
- case X86II::MRM_F0:
- case X86II::MRM_F8:
- case X86II::MRM_F9:
- case X86II::MRM_D0:
- case X86II::MRM_D1:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ unsigned FirstMemOp = 0;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
+ return FirstMemOp;
+ }
+ case X86II::MRM_C1: case X86II::MRM_C2:
+ case X86II::MRM_C3: case X86II::MRM_C4:
+ case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_E8: case X86II::MRM_F0:
+ case X86II::MRM_F8: case X86II::MRM_F9:
+ case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D8:
+ case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE:
+ case X86II::MRM_DF:
return -1;
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
new file mode 100644
index 000000000000..5a42a801825d
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -0,0 +1,224 @@
+//===-- X86ELFObjectWriter.cpp - X86 ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ X86ELFObjectWriter(bool is64Bit, uint8_t OSABI);
+
+ virtual ~X86ELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ };
+}
+
+X86ELFObjectWriter::X86ELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI,
+ Is64Bit ? ELF::EM_X86_64 : ELF::EM_386,
+ /*HasRelocationAddend*/ Is64Bit) {}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ // determine the type of the relocation
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ unsigned Type;
+ if (is64Bit()) {
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
+ case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
+ case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
+
+ case FK_PCRel_8:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC64;
+ break;
+ case X86::reloc_signed_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_riprel_4byte:
+ case FK_PCRel_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_X86_64_PLT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_X86_64_GOTTPOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_X86_64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Type = ELF::R_X86_64_TLSLD;
+ break;
+ }
+ break;
+ case FK_PCRel_2:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC16;
+ break;
+ case FK_PCRel_1:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC8;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_8: Type = ELF::R_X86_64_64; break;
+ case X86::reloc_signed_4byte:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_32S;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_X86_64_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_X86_64_TPOFF32;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_X86_64_DTPOFF32;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_X86_64_32;
+ break;
+ case FK_Data_2: Type = ELF::R_X86_64_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_X86_64_8; break;
+ }
+ }
+ } else {
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
+ break;
+
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_386_PLT32;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
+ break;
+
+ // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+ // instead?
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_32;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_386_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_386_GOTOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_386_TLS_GD;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_386_TLS_LE_32;
+ break;
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ Type = ELF::R_386_TLS_IE;
+ break;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ Type = ELF::R_386_TLS_LE;
+ break;
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ Type = ELF::R_386_TLS_GOTIE;
+ break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ Type = ELF::R_386_TLS_LDM;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_386_TLS_LDO_32;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_386_TLS_IE_32;
+ break;
+ }
+ break;
+ case FK_Data_2: Type = ELF::R_386_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_386_8; break;
+ }
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW =
+ new X86ELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 17d242ab761e..f2e34cbe0d65 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -1,4 +1,4 @@
-//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//===-- X86FixupKinds.h - X86 Specific Fixup Entries ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 27031005bd09..afa545cbb314 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -48,6 +48,8 @@ static const char *const x86_asm_table[] = {
"{cc}", "cc",
0,0};
+void X86MCAsmInfoDarwin::anchor() { }
+
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
if (is64Bit)
@@ -80,6 +82,8 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
: X86MCAsmInfoDarwin(Triple) {
}
+void X86ELFMCAsmInfo::anchor() { }
+
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
if (T.getArch() == Triple::x86_64)
PointerSize = 8;
@@ -125,7 +129,23 @@ getNonexecutableStackSection(MCContext &Ctx) const {
0, SectionKind::getMetadata());
}
-X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+void X86MCAsmInfoMicrosoft::anchor() { }
+
+X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+}
+
+void X86MCAsmInfoGNUCOFF::anchor() { }
+
+X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
@@ -135,4 +155,7 @@ X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8eb30ec..b6b70fd3e855 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//===-- X86MCAsmInfo.h - X86 asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,7 +21,9 @@
namespace llvm {
class Triple;
- struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
explicit X86MCAsmInfoDarwin(const Triple &Triple);
};
@@ -33,13 +35,23 @@ namespace llvm {
MCStreamer &Streamer) const;
};
- struct X86ELFMCAsmInfo : public MCAsmInfo {
+ class X86ELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit X86ELFMCAsmInfo(const Triple &Triple);
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
};
- struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
- explicit X86MCAsmInfoCOFF(const Triple &Triple);
+ class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ virtual void anchor();
+ public:
+ explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+ };
+
+ class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+ virtual void anchor();
+ public:
+ explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
};
} // namespace llvm
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 2eee1128119e..80990e5822bd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -46,6 +46,11 @@ public:
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
}
+ bool is32BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
+ }
+
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86_MC::getX86RegNum(MO.getReg());
}
@@ -63,9 +68,8 @@ public:
unsigned OpNum) {
unsigned SrcReg = MI.getOperand(OpNum).getReg();
unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
- if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
- (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
- SrcRegNum += 8;
+ if (X86II::isX86_64ExtendedReg(SrcReg))
+ SrcRegNum |= 8;
// The registers represented through VEX_VVVV should
// be encoded in 1's complement form.
@@ -86,7 +90,7 @@ public:
}
}
- void EmitImmediate(const MCOperand &Disp,
+ void EmitImmediate(const MCOperand &Disp, SMLoc Loc,
unsigned ImmSize, MCFixupKind FixupKind,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -155,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
return MCFixup::getKindForSize(Size, isPCRel);
}
-/// Is32BitMemOperand - Return true if the specified instruction with a memory
-/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
-/// memory operand. Op specifies the operand # of the memoperand.
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
@@ -170,28 +173,71 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
return false;
}
-/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
-/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
-/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+#endif
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// StartsWithGlobalOffsetTable - Check if this expression starts with
+/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
+/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
+/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
/// of a binary expression.
-static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+enum GlobalOffsetTableExprKind {
+ GOT_None,
+ GOT_Normal,
+ GOT_SymDiff
+};
+static GlobalOffsetTableExprKind
+StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ const MCExpr *RHS = 0;
if (Expr->getKind() == MCExpr::Binary) {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
Expr = BE->getLHS();
+ RHS = BE->getRHS();
}
if (Expr->getKind() != MCExpr::SymbolRef)
- return false;
+ return GOT_None;
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
const MCSymbol &S = Ref->getSymbol();
- return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+ if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
+ return GOT_None;
+ if (RHS && RHS->getKind() == MCExpr::SymbolRef)
+ return GOT_SymDiff;
+ return GOT_Normal;
}
void X86MCCodeEmitter::
-EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
- unsigned &CurByte, raw_ostream &OS,
+EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
const MCExpr *Expr = NULL;
if (DispOp.isImm()) {
@@ -210,12 +256,21 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
// If we have an immoffset, add it to the expression.
if ((FixupKind == FK_Data_4 ||
- FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
- StartsWithGlobalOffsetTable(Expr)) {
- assert(ImmOffset == 0);
-
- FixupKind = MCFixupKind(X86::reloc_global_offset_table);
- ImmOffset = CurByte;
+ FixupKind == FK_Data_8 ||
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+ GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+ if (Kind != GOT_None) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ if (Kind == GOT_Normal)
+ ImmOffset = CurByte;
+ } else if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) {
+ FixupKind = MCFixupKind(FK_SecRel_4);
+ }
+ }
}
// If the fixup is pc-relative, we need to bias the value to be relative to
@@ -234,7 +289,7 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
Ctx);
// Emit a symbolic constant as a fixup and 4 zeros.
- Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind, Loc));
EmitConstant(0, Size, CurByte, OS);
}
@@ -270,7 +325,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// expression to emit.
int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
- EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind),
CurByte, OS, Fixups, -ImmSize);
return;
}
@@ -294,7 +349,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
if (BaseReg == 0) { // [disp32] in X86-32 mode
EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
@@ -310,13 +365,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm() && isDisp8(Disp.getImm())) {
EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
return;
}
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
Fixups);
return;
}
@@ -375,10 +430,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Do we need to output a displacement?
if (ForceDisp8)
- EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
else if (ForceDisp32 || Disp.getImm() != 0)
- EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
- Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+ CurByte, OS, Fixups);
}
/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -387,9 +442,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
int MemOperand, const MCInst &MI,
const MCInstrDesc &Desc,
raw_ostream &OS) const {
- bool HasVEX_4V = false;
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
- HasVEX_4V = true;
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -417,6 +471,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// opcode extension, or ignored, depending on the opcode byte)
unsigned char VEX_W = 0;
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
// VEX_5M (VEX m-mmmmm field):
//
// 0b00000: Reserved for future use
@@ -424,7 +481,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0b00010: implied 0F 38 leading opcode bytes
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
- //
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -455,27 +513,44 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
- default: assert(0 && "Invalid prefix!");
+ default: llvm_unreachable("Invalid prefix!");
case X86II::T8: // 0F 38
VEX_5M = 0x2;
break;
case X86II::TA: // 0F 3A
VEX_5M = 0x3;
break;
- case X86II::TF: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
+ VEX_PP = 0x2;
+ VEX_5M = 0x2;
+ break;
+ case X86II::T8XD: // F2 0F 38
VEX_PP = 0x3;
VEX_5M = 0x2;
break;
+ case X86II::TAXD: // F2 0F 3A
+ VEX_PP = 0x3;
+ VEX_5M = 0x3;
+ break;
case X86II::XS: // F3 0F
VEX_PP = 0x2;
break;
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
case X86II::A6: // Bypass: Not used by VEX
case X86II::A7: // Bypass: Not used by VEX
case X86II::TB: // Bypass: Not used by VEX
@@ -483,6 +558,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
break; // No prefix!
}
+
// Set the vector length to 256-bit if YMM0-YMM15 is used
for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
if (!MI.getOperand(i).isReg())
@@ -495,7 +571,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
unsigned CurOp = 0;
switch (TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
case X86II::MRMDestMem: {
// MRMDestMem instructions forms:
// MemAddr, src1(ModR/M)
@@ -516,41 +592,50 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_R = 0x0;
break;
}
- case X86II::MRMSrcMem: {
+ case X86II::MRMSrcMem:
// MRMSrcMem instructions forms:
// src1(ModR/M), MemAddr
// src1(ModR/M), src2(VEX_4V), MemAddr
// src1(ModR/M), MemAddr, imm8
// src1(ModR/M), MemAddr, src2(VEX_I8IMM)
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
VEX_R = 0x0;
- unsigned MemAddrOffset = 1;
- if (HasVEX_4V) {
+ if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, 1);
- MemAddrOffset++;
- }
if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg()))
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
VEX_B = 0x0;
if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg()))
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
VEX_X = 0x0;
+
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
break;
- }
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRM6m: case X86II::MRM7m: {
// MRM[0-9]m instructions forms:
// MemAddr
- if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ // src1(VEX_4V), MemAddr
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
VEX_X = 0x0;
break;
+ }
case X86II::MRMSrcReg:
// MRMSrcReg instructions forms:
// dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
@@ -565,6 +650,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
+ CurOp++;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
break;
case X86II::MRMDestReg:
// MRMDestReg instructions forms:
@@ -605,14 +693,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
- if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
}
// 3 byte VEX prefix
- EmitByte(0xC4, CurByte, OS);
+ EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
}
@@ -647,7 +735,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
}
switch (TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
case X86II::MRMSrcReg:
if (MI.getOperand(0).isReg() &&
X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
@@ -717,12 +805,12 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
const MCInst &MI,
raw_ostream &OS) const {
switch (TSFlags & X86II::SegOvrMask) {
- default: assert(0 && "Invalid segment!");
+ default: llvm_unreachable("Invalid segment!");
case 0:
// No segment override, check for explicit one on memory operand.
if (MemOperand != -1) { // If the instruction has a memory operand.
switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
- default: assert(0 && "Unknown segment register!");
+ default: llvm_unreachable("Unknown segment register!");
case 0: break;
case X86::CS: EmitByte(0x2E, CurByte, OS); break;
case X86::SS: EmitByte(0x36, CurByte, OS); break;
@@ -763,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EmitByte(0xF3, CurByte, OS);
// Emit the address size opcode prefix as needed.
- if ((TSFlags & X86II::AdSize) ||
- (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+ bool need_address_override;
+ if (TSFlags & X86II::AdSize) {
+ need_address_override = true;
+ } else if (MemOperand == -1) {
+ need_address_override = false;
+ } else if (is64BitMode()) {
+ assert(!Is16BitMemOperand(MI, MemOperand));
+ need_address_override = Is32BitMemOperand(MI, MemOperand);
+ } else if (is32BitMode()) {
+ assert(!Is64BitMemOperand(MI, MemOperand));
+ need_address_override = Is16BitMemOperand(MI, MemOperand);
+ } else {
+ need_address_override = false;
+ }
+
+ if (need_address_override)
EmitByte(0x67, CurByte, OS);
// Emit the operand size opcode prefix as needed.
@@ -773,7 +875,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
bool Need0FPrefix = false;
switch (TSFlags & X86II::Op0Mask) {
- default: assert(0 && "Invalid prefix!");
+ default: llvm_unreachable("Invalid prefix!");
case 0: break; // No prefix!
case X86II::REP: break; // already handled.
case X86II::TB: // Two-byte opcode prefix
@@ -783,7 +885,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::A7: // 0F A7
Need0FPrefix = true;
break;
- case X86II::TF: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
+ EmitByte(0xF3, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::TAXD: // F2 0F 3A
EmitByte(0xF2, CurByte, OS);
Need0FPrefix = true;
break;
@@ -818,10 +928,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// FIXME: Pull this up into previous switch if REX can be moved earlier.
switch (TSFlags & X86II::Op0Mask) {
- case X86II::TF: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
+ case X86II::T8XD: // F2 0F 38
case X86II::T8: // 0F 38
EmitByte(0x38, CurByte, OS);
break;
+ case X86II::TAXD: // F2 0F 3A
case X86II::TA: // 0F 3A
EmitByte(0x3A, CurByte, OS);
break;
@@ -859,18 +971,16 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned CurByte = 0;
// Is this instruction encoded using the AVX VEX prefix?
- bool HasVEXPrefix = false;
+ bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
// It uses the VEX.VVVV field?
- bool HasVEX_4V = false;
-
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
- HasVEXPrefix = true;
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
- HasVEX_4V = true;
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ const unsigned MemOp4_I8IMMOperand = 2;
// Determine where the memory operand starts, if present.
- int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
if (MemoryOperand != -1) MemoryOperand += CurOp;
if (!HasVEXPrefix)
@@ -886,27 +996,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
- assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+ llvm_unreachable("FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
- assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+ llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!");
case X86II::Pseudo:
- assert(0 && "Pseudo instruction shouldn't be emitted");
+ llvm_unreachable("Pseudo instruction shouldn't be emitted");
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
case X86II::RawFrmImm8:
EmitByte(BaseOpcode, CurByte, OS);
- EmitImmediate(MI.getOperand(CurOp++),
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
CurByte, OS, Fixups);
- EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
+ OS, Fixups);
break;
case X86II::RawFrmImm16:
EmitByte(BaseOpcode, CurByte, OS);
- EmitImmediate(MI.getOperand(CurOp++),
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
CurByte, OS, Fixups);
- EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
+ OS, Fixups);
break;
case X86II::AddRegFrm:
@@ -940,9 +1052,16 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
+ if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
+ SrcRegNum++;
+
EmitRegModRMByte(MI.getOperand(SrcRegNum),
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
- CurOp = SrcRegNum + 1;
+
+ // 2 operands skipped with HasMemOp4, comensate accordingly
+ CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
break;
case X86II::MRMSrcMem: {
@@ -952,12 +1071,16 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
++AddrOperands;
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
}
+ if(HasMemOp4) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
TSFlags, CurByte, OS, Fixups);
CurOp += AddrOperands + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
break;
}
@@ -976,58 +1099,52 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ CurOp++;
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
TSFlags, CurByte, OS, Fixups);
CurOp += X86::AddrNumOperands;
break;
- case X86II::MRM_C1:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC1, CurByte, OS);
- break;
- case X86II::MRM_C2:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC2, CurByte, OS);
- break;
- case X86II::MRM_C3:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC3, CurByte, OS);
- break;
- case X86II::MRM_C4:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC4, CurByte, OS);
- break;
- case X86II::MRM_C8:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC8, CurByte, OS);
- break;
- case X86II::MRM_C9:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC9, CurByte, OS);
- break;
- case X86II::MRM_E8:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xE8, CurByte, OS);
- break;
- case X86II::MRM_F0:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xF0, CurByte, OS);
- break;
- case X86II::MRM_F8:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xF8, CurByte, OS);
- break;
+ case X86II::MRM_C1: case X86II::MRM_C2:
+ case X86II::MRM_C3: case X86II::MRM_C4:
+ case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D8:
+ case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE:
+ case X86II::MRM_DF: case X86II::MRM_E8:
+ case X86II::MRM_F0: case X86II::MRM_F8:
case X86II::MRM_F9:
EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xF9, CurByte, OS);
- break;
- case X86II::MRM_D0:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xD0, CurByte, OS);
- break;
- case X86II::MRM_D1:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xD1, CurByte, OS);
+
+ unsigned char MRM;
+ switch (TSFlags & X86II::FormMask) {
+ default: llvm_unreachable("Invalid Form");
+ case X86II::MRM_C1: MRM = 0xC1; break;
+ case X86II::MRM_C2: MRM = 0xC2; break;
+ case X86II::MRM_C3: MRM = 0xC3; break;
+ case X86II::MRM_C4: MRM = 0xC4; break;
+ case X86II::MRM_C8: MRM = 0xC8; break;
+ case X86II::MRM_C9: MRM = 0xC9; break;
+ case X86II::MRM_D0: MRM = 0xD0; break;
+ case X86II::MRM_D1: MRM = 0xD1; break;
+ case X86II::MRM_D4: MRM = 0xD4; break;
+ case X86II::MRM_D8: MRM = 0xD8; break;
+ case X86II::MRM_D9: MRM = 0xD9; break;
+ case X86II::MRM_DA: MRM = 0xDA; break;
+ case X86II::MRM_DB: MRM = 0xDB; break;
+ case X86II::MRM_DC: MRM = 0xDC; break;
+ case X86II::MRM_DD: MRM = 0xDD; break;
+ case X86II::MRM_DE: MRM = 0xDE; break;
+ case X86II::MRM_DF: MRM = 0xDF; break;
+ case X86II::MRM_E8: MRM = 0xE8; break;
+ case X86II::MRM_F0: MRM = 0xF0; break;
+ case X86II::MRM_F8: MRM = 0xF8; break;
+ case X86II::MRM_F9: MRM = 0xF9; break;
+ }
+ EmitByte(MRM, CurByte, OS);
break;
}
@@ -1035,14 +1152,26 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// according to the right size for the instruction.
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
- // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ // in bits[7:4] of a immediate byte.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
- const MCOperand &MO = MI.getOperand(CurOp++);
+ const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
+ : CurOp);
+ CurOp++;
bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
- EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
- Fixups);
+ // If there is an additional 5th operand it must be an immediate, which
+ // is encoded in bits[3:0]
+ if(CurOp != NumOps) {
+ const MCOperand &MIMM = MI.getOperand(CurOp++);
+ if(MIMM.isImm()) {
+ unsigned Val = MIMM.getImm();
+ assert(Val < 16 && "Immediate operand value out of range");
+ RegNum |= Val;
+ }
+ }
+ EmitImmediate(MCOperand::CreateImm(RegNum), MI.getLoc(), 1, FK_Data_1,
+ CurByte, OS, Fixups);
} else {
unsigned FixupKind;
// FIXME: Is there a better way to know that we need a signed relocation?
@@ -1053,7 +1182,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
FixupKind = X86::reloc_signed_4byte;
else
FixupKind = getImmFixupKind(TSFlags);
- EmitImmediate(MI.getOperand(CurOp++),
+ EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
CurByte, OS, Fixups);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index f98d5e331fef..348236316c89 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,6 +24,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_REGINFO_MC_DESC
@@ -35,6 +36,10 @@
#define GET_SUBTARGETINFO_MC_DESC
#include "X86GenSubtargetInfo.inc"
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
using namespace llvm;
@@ -45,10 +50,6 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
FS = "+64bit-mode";
else
FS = "-64bit-mode";
- if (TheTriple.getOS() == Triple::NativeClient)
- FS += ",+nacl-mode";
- else
- FS += ",-nacl-mode";
return FS;
}
@@ -76,6 +77,8 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
*rECX = registers[2];
*rEDX = registers[3];
return false;
+ #else
+ return true;
#endif
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
#if defined(__GNUC__)
@@ -102,9 +105,81 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
mov dword ptr [esi],edx
}
return false;
+ #else
+ return true;
#endif
+#else
+ return true;
#endif
+}
+
+/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
+/// 4 values in the specified arguments. If we can't run cpuid on the host,
+/// return true.
+bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value),
+ "c" (subleaf));
+ return false;
+ #elif defined(_MSC_VER)
+ // __cpuidex was added in MSVC++ 9.0 SP1
+ #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #else
+ return true;
+ #endif
+ #else
+ return true;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value),
+ "c" (subleaf));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ mov ecx,subleaf
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #else
+ return true;
+ #endif
+#else
return true;
+#endif
}
void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
@@ -261,7 +336,8 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
std::string CPUName = CPU;
if (CPUName.empty()) {
-#if defined (__x86_64__) || defined(__i386__)
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
CPUName = sys::getHostCPUName();
#else
CPUName = "generic";
@@ -303,8 +379,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
MAI = new X86_64MCAsmInfoDarwin(TheTriple);
else
MAI = new X86MCAsmInfoDarwin(TheTriple);
- } else if (TheTriple.isOSWindows()) {
- MAI = new X86MCAsmInfoCOFF(TheTriple);
+ } else if (TheTriple.getOS() == Triple::Win32) {
+ MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+ } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
+ MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
} else {
MAI = new X86ELFMCAsmInfo(TheTriple);
}
@@ -327,7 +405,8 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
Triple T(TT);
@@ -371,7 +450,7 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
// 64-bit JIT places everything in the same buffer except external funcs.
CM = is64Bit ? CodeModel::Large : CodeModel::Small;
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
@@ -395,11 +474,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI);
+ return new X86ATTInstPrinter(MAI, MII, MRI);
if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI);
+ return new X86IntelInstPrinter(MAI, MII, MRI);
return 0;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index c144c513de15..9896cbe53632 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -54,6 +54,11 @@ namespace X86_MC {
/// the specified arguments. If we can't run cpuid on the host, return true.
bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+ /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+ /// the 4 values in the specified arguments. If we can't run cpuid on the
+ /// host, return true.
+ bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
@@ -83,6 +88,12 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint8_t OSABI);
+/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
} // End llvm namespace
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..bc272efcc9ce
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -0,0 +1,65 @@
+//===-- X86WinCOFFObjectWriter.cpp - X86 Win COFF Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace llvm {
+ class MCObjectWriter;
+}
+
+namespace {
+ class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+ const bool Is64Bit;
+
+ public:
+ X86WinCOFFObjectWriter(bool Is64Bit_);
+ ~X86WinCOFFObjectWriter();
+
+ virtual unsigned getRelocType(unsigned FixupKind) const;
+ };
+}
+
+X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
+ : MCWinCOFFObjectTargetWriter(Is64Bit_ ? COFF::IMAGE_FILE_MACHINE_AMD64 :
+ COFF::IMAGE_FILE_MACHINE_I386),
+ Is64Bit(Is64Bit_) {}
+
+X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
+
+unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+ switch (FixupKind) {
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
+ case FK_Data_4:
+ case X86::reloc_signed_4byte:
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
+ case FK_Data_8:
+ if (Is64Bit)
+ return COFF::IMAGE_REL_AMD64_ADDR64;
+ llvm_unreachable("unsupported relocation type");
+ case FK_SecRel_4:
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+}
+
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+ bool Is64Bit) {
+ MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7d901afae474..624e56fa0f64 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -923,15 +923,21 @@ The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
-If SSE4.1 is available we should inline rounding functions instead of emitting
-a libcall.
+[UNSAFE FP]
-floor: roundsd $0x01, %xmm, %xmm
-ceil: roundsd $0x02, %xmm, %xmm
+void foo(double, double, double);
+void norm(double x, double y, double z) {
+ double scale = __builtin_sqrt(x*x + y*y + z*z);
+ foo(x/scale, y/scale, z/scale);
+}
-and likewise for the single precision versions.
+We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is
+slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first
+and emit 3 mulsd in place of the divs. This can be done as a target-independent
+transform.
-Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
-corresponding nodes and some targets (including X86) aren't ready for them.
+If we're dealing with floats instead of doubles we could even replace the sqrtss
+and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the
+cost of reduced accuracy.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index b40795506071..6a8a4fdf2520 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -56,7 +56,7 @@ cmovs, we should expand to a conditional branch like GCC produces.
Some isel ideas:
-1. Dynamic programming based approach when compile time if not an
+1. Dynamic programming based approach when compile time is not an
issue.
2. Code duplication (addressing mode) during isel.
3. Other ideas from "Register-Sensitive Selection, Duplication, and
@@ -2061,34 +2061,20 @@ The trick is to match "fetch_and_add(X, -C) == C".
//===---------------------------------------------------------------------===//
-unsigned log2(unsigned x) {
- return x > 1 ? 32-__builtin_clz(x-1) : 0;
+unsigned t(unsigned a, unsigned b) {
+ return a <= b ? 5 : -5;
}
-generates (x86_64):
- xorl %eax, %eax
- cmpl $2, %edi
- jb LBB0_2
-## BB#1:
- decl %edi
- movl $63, %ecx
- bsrl %edi, %eax
- cmovel %ecx, %eax
- xorl $-32, %eax
- addl $33, %eax
-LBB0_2:
- ret
-
-The cmov and the early test are redundant:
- xorl %eax, %eax
- cmpl $2, %edi
- jb LBB0_2
-## BB#1:
- decl %edi
- bsrl %edi, %eax
- xorl $-32, %eax
- addl $33, %eax
-LBB0_2:
- ret
+We generate:
+ movl $5, %ecx
+ cmpl %esi, %edi
+ movl $-5, %eax
+ cmovbel %ecx, %eax
+
+GCC:
+ cmpl %edi, %esi
+ sbbl %eax, %eax
+ andl $-10, %eax
+ addl $5, %eax
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
index 4da00fa44f4d..b1d0b9f9f9bd 100644
--- a/lib/Target/X86/TargetInfo/CMakeLists.txt
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info
X86TargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMX86Info
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMX86Info X86CommonTableGen)
diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..3c64a2255302
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Info
+parent = X86
+required_libraries = MC Support Target
+add_to_library_groups = X86
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
index caffd8b37816..2e72c344d99c 100644
--- a/lib/Target/X86/Utils/CMakeLists.txt
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils
X86ShuffleDecode.cpp
)
-add_llvm_library_dependencies(LLVMX86Utils
- LLVMCore
- LLVMSupport
- )
-
add_dependencies(LLVMX86Utils X86CommonTableGen)
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..de0a30fa19c8
--- /dev/null
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/Utils/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Utils
+parent = X86
+required_libraries = Core Support
+add_to_library_groups = X86
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index aeb3309d09aa..32c722acc437 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -20,7 +20,7 @@
namespace llvm {
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
// Defaults the copying the dest value.
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
@@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
}
// <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = NElts/2; i != NElts; ++i)
ShuffleMask.push_back(NElts+i);
@@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts,
}
// <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i)
ShuffleMask.push_back(i);
@@ -63,16 +61,26 @@ void DecodeMOVLHPSMask(unsigned NElts,
ShuffleMask.push_back(NElts+i);
}
-void DecodePSHUFMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts; ++i) {
- ShuffleMask.push_back(Imm % NElts);
- Imm /= NElts;
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ int NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + l);
+ NewImm /= NumLaneElts;
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}
-void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
ShuffleMask.push_back(2);
@@ -83,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm,
}
}
-void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != 4; ++i) {
ShuffleMask.push_back((Imm & 3));
Imm >>= 2;
@@ -95,76 +102,35 @@ void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-void DecodePUNPCKLBWMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLWDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(VT, ShuffleMask);
-}
-
-void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2);
- ShuffleMask.push_back(i+NElts+NElts/2);
- }
-}
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- // Part that reads from dest.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts);
- Imm /= NElts;
- }
- // Part that reads from src.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts + NElts);
- Imm /= NElts;
- }
-}
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
-void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2); // Reads from dest
- ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ int NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + l);
+ NewImm /= NumLaneElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + l);
+ NewImm /= NumLaneElts;
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}
-void DecodeUNPCKLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodeUNPCKLPDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -173,55 +139,36 @@ void DecodeUNPCKLPMask(EVT VT,
if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
- unsigned Start = 0;
- unsigned End = NumLaneElts / 2;
- for (unsigned s = 0; s < NumLanes; ++s) {
- for (unsigned i = Start; i != End; ++i) {
- ShuffleMask.push_back(i); // Reads from dest/src1
- ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
}
- // Process the next 128 bits.
- Start += NumLaneElts;
- End += NumLaneElts;
}
}
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- unsigned NumLanes = (NumElts*32)/128;
- unsigned LaneSize = NumElts/NumLanes;
-
- for (unsigned l = 0; l != NumLanes; ++l) {
- for (unsigned i = 0; i != LaneSize; ++i) {
- unsigned Idx = (Imm >> (i*2)) & 0x3 ;
- ShuffleMask.push_back(Idx+(l*LaneSize));
- }
- }
-}
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- unsigned NumLanes = (NumElts*64)/128;
- unsigned LaneSize = NumElts/NumLanes;
-
- for (unsigned l = 0; l < NumLanes; ++l) {
- for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- unsigned Idx = (Imm >> i) & 0x1;
- ShuffleMask.push_back(Idx+(l*LaneSize));
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
}
}
}
-void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
unsigned HalfSize = VT.getVectorNumElements()/2;
unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
@@ -232,12 +179,4 @@ void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
ShuffleMask.push_back(i);
}
-void DecodeVPERM2F128Mask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
- // has information about the instruction and not the types. So for
- // instruction comments purpose, assume the 256-bit vector is v4i64.
- return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
-}
-
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 58193e6a4688..5b8c6ef62e29 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -24,83 +24,41 @@
namespace llvm {
enum {
- SM_SentinelZero = ~0U
+ SM_SentinelZero = -1
};
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
// <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePUNPCKLBWMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePUNPCKLWDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
-void DecodePUNPCKLDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
-void DecodePUNPCKLQDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodePUNPCKLMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKLPDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeVPERM2F128Mask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 81e94227fca6..ecc7b59c6fa0 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,8 +24,6 @@ namespace llvm {
class FunctionPass;
class JITCodeEmitter;
-class MachineCodeEmitter;
-class Target;
class X86TargetMachine;
/// createX86ISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 104b91fd3534..b6591d441969 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,4 +1,4 @@
-//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,9 +23,6 @@ include "llvm/Target/Target.td"
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
-def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
- "Native Client mode">;
-
//===----------------------------------------------------------------------===//
// X86 Subtarget features.
//===----------------------------------------------------------------------===//
@@ -58,7 +55,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
- [FeatureSSE41, FeaturePOPCNT]>;
+ [FeatureSSE41]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions",
[FeatureMMX]>;
@@ -81,16 +78,24 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
- [FeaturePOPCNT]>;
+ [FeatureSSE3]>;
-def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
- "Enable AVX instructions">;
+def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
+ "Enable AVX instructions",
+ [FeatureSSE42]>;
+def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
+ "Enable AVX2 instructions",
+ [FeatureAVX]>;
def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
"Enable carry-less multiplication instructions">;
def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
- "Enable three-operand fused multiple-add">;
+ "Enable three-operand fused multiple-add",
+ [FeatureAVX]>;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
- "Enable four-operand fused multiple-add">;
+ "Enable four-operand fused multiple-add",
+ [FeatureAVX]>;
+def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
+ "Enable XOP instructions">;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
@@ -102,17 +107,31 @@ def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
"Support RDRAND instruction">;
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
"Support 16-bit floating point conversion instructions">;
+def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
+ "Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
"Support LZCNT instruction">;
def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
"Support BMI instructions">;
+def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
+ "Support BMI2 instructions">;
+def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+ "Use LEA for adjusting the stack pointer">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
+include "X86Schedule.td"
+
+def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
+ "Intel Atom processors">;
+
class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+ : Processor<Name, GenericItineraries, Features>;
+
+class AtomProc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, AtomItineraries, Features>;
def : Proc<"generic", []>;
def : Proc<"i386", []>;
@@ -137,34 +156,38 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE,
- FeatureSlowBTMem]>;
+def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>;
// "Arrandale" along with corei3 and corei5
def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>;
+ FeatureSlowBTMem, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES]>;
def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem]>;
+ FeatureSlowBTMem, FeatureFastUAMem,
+ FeaturePOPCNT]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem, FeatureAES,
- FeatureCLMUL]>;
+ FeatureSlowBTMem, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeatureCLMUL]>;
// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
// FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B,
+def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
FeatureAES, FeatureCLMUL]>;
// Ivy Bridge
-def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B,
+def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
FeatureAES, FeatureCLMUL,
- FeatureRDRAND, FeatureF16C]>;
+ FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
// Haswell
-def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
- FeatureCLMUL, FeatureRDRAND, FeatureF16C,
- FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
- FeatureBMI]>;
+// FIXME: Disabling AVX/AVX2/FMA3 for now since it's not ready.
+def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
+ FeatureAES, FeatureCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase,
+ FeatureMOVBE, FeatureLZCNT, FeatureBMI,
+ FeatureBMI2]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
@@ -189,15 +212,21 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSSE4A, Feature3DNowA]>;
-def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A,
- Feature3DNowA]>;
+ Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
+ FeaturePOPCNT, FeatureSlowBTMem]>;
+// Bobcat
+def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureLZCNT, FeaturePOPCNT]>;
+// FIXME: Disabling AVX/FMA4 for now since it's not ready.
+// Bulldozer
+def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL,
+ FeatureXOP, FeatureLZCNT, FeaturePOPCNT]>;
+// Enhanced Bulldozer
+def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL,
+ FeatureXOP, FeatureF16C, FeatureLZCNT,
+ FeaturePOPCNT, FeatureBMI]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>;
@@ -229,9 +258,11 @@ include "X86CallingConv.td"
// Assembly Parser
//===----------------------------------------------------------------------===//
-// Currently the X86 assembly parser only supports ATT syntax.
def ATTAsmParser : AsmParser {
- string AsmParserClassName = "ATTAsmParser";
+ string AsmParserClassName = "AsmParser";
+}
+
+def ATTAsmParserVariant : AsmParserVariant {
int Variant = 0;
// Discard comments in assembly strings.
@@ -241,6 +272,16 @@ def ATTAsmParser : AsmParser {
string RegisterPrefix = "%";
}
+def IntelAsmParserVariant : AsmParserVariant {
+ int Variant = 1;
+
+ // Discard comments in assembly strings.
+ string CommentDelimiter = ";";
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "";
+}
+
//===----------------------------------------------------------------------===//
// Assembly Printers
//===----------------------------------------------------------------------===//
@@ -261,8 +302,7 @@ def IntelAsmWriter : AsmWriter {
def X86 : Target {
// Information about the instructions...
let InstructionSet = X86InstrInfo;
-
let AssemblyParsers = [ATTAsmParser];
-
+ let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
}
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 4c3ff02826b0..7db7ccbedcfe 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,13 +13,12 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
@@ -199,6 +198,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP_PIC_BASE:
O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
break;
+ case X86II::MO_SECREL: O << "@SECREL"; break;
}
}
@@ -264,16 +264,40 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
raw_ostream &O) {
unsigned char value = MI->getOperand(Op).getImm();
- assert(value <= 7 && "Invalid ssecc argument!");
switch (value) {
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
+ default: llvm_unreachable("Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ case 0x10: O << "eq_os"; break;
+ case 0x11: O << "lt_oq"; break;
+ case 0x12: O << "le_oq"; break;
+ case 0x13: O << "unord_s"; break;
+ case 0x14: O << "neq_us"; break;
+ case 0x15: O << "nlt_uq"; break;
+ case 0x16: O << "nle_uq"; break;
+ case 0x17: O << "ord_s"; break;
+ case 0x18: O << "eq_us"; break;
+ case 0x19: O << "nge_uq"; break;
+ case 0x1a: O << "ngt_uq"; break;
+ case 0x1b: O << "false_os"; break;
+ case 0x1c: O << "neq_os"; break;
+ case 0x1d: O << "ge_oq"; break;
+ case 0x1e: O << "gt_oq"; break;
+ case 0x1f: O << "true_us"; break;
}
}
@@ -575,7 +599,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
- MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+ MMI->usesVAFloatArgument()) {
StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 3a50435d38ba..a6ed9ba0060d 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -17,7 +17,6 @@
#include "X86.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -25,11 +24,7 @@
namespace llvm {
-class MachineJumpTableInfo;
-class MCContext;
-class MCInst;
class MCStreamer;
-class MCSymbol;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index 4326814a7a96..e01ff41ed198 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//===-- X86COFFMachineModuleInfo.cpp - X86 COFF MMI Impl ------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 98ab2a66a17f..0cec95a57abc 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,9 @@
#ifndef X86COFF_MACHINEMODULEINFO_H
#define X86COFF_MACHINEMODULEINFO_H
+#include "X86MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/DenseSet.h"
-#include "X86MachineFunctionInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 77b99056ae00..d148989e97f9 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -1,10 +1,10 @@
-//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
-//
+//===-- X86CallingConv.td - Calling Conventions X86 32/64 --*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the X86-32 and X86-64
@@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
// weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling
// conv.
- CCIfInReg<CCIfSubtarget<"hasXMMInt()",
+ CCIfInReg<CCIfSubtarget<"hasSSE2()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
CCDelegateTo<RetCC_X86Common>
@@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
// SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
- CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
- CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
// For integers, ECX can be used as an extra return register
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
@@ -150,18 +150,23 @@ def CC_X86_64_C : CallingConv<[
// The first 8 MMX vector arguments are passed in XMM registers on Darwin.
CCIfType<[x86mmx],
CCIfSubtarget<"isTargetDarwin()",
- CCIfSubtarget<"hasXMMInt()",
+ CCIfSubtarget<"hasSSE2()",
CCPromoteToType<v2i64>>>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasXMM()",
+ CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
- // The first 8 256-bit vector arguments are passed in YMM registers.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- CCIfSubtarget<"hasAVX()",
- CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+ // The first 8 256-bit vector arguments are passed in YMM registers, unless
+ // this is a vararg function.
+ // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+ // fixed arguments to vararg functions are supposed to be passed in
+ // registers. Actually modeling that would be a lot of work, though.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+ YMM4, YMM5, YMM6, YMM7]>>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
@@ -193,6 +198,10 @@ def CC_X86_Win64_C : CallingConv<[
// 128 bit vectors are passed by pointer
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+ // 256 bit vectors are passed by pointer
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
+
// The first 4 MMX vector arguments are passed in GPRs.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
@@ -233,7 +242,7 @@ def CC_X86_64_GHC : CallingConv<[
// Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasXMM()",
+ CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
@@ -251,7 +260,7 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasXMMInt()",
+ CCIfSubtarget<"hasSSE2()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
// The first 3 __m64 vector arguments are passed in mmx registers if the
@@ -322,8 +331,8 @@ def CC_X86_32_ThisCall : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
- // The 'nest' parameter, if any, is passed in EAX.
- CCIfNest<CCAssignToReg<[EAX]>>,
+ // Pass sret arguments indirectly through EAX
+ CCIfSRet<CCAssignToReg<[EAX]>>,
// The first integer argument is passed in ECX
CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -350,7 +359,7 @@ def CC_X86_32_FastCC : CallingConv<[
// The first 3 float or double arguments, if the call is not a vararg
// call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasXMMInt()",
+ CCIfSubtarget<"hasSSE2()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
// Doubles get 8-byte slots that are 8-byte aligned.
@@ -399,3 +408,18 @@ def CC_X86 : CallingConv<[
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
CCDelegateTo<CC_X86_32>
]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved Registers.
+//===----------------------------------------------------------------------===//
+
+def CSR_Ghc : CalleeSavedRegs<(add)>;
+
+def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
+def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
+
+def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>;
+def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
+
+def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
+ (sequence "XMM%u", 6, 15))>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f939510fa06b..ee3de9a3f6fe 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -664,15 +664,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::A7: // 0F A7
Need0FPrefix = true;
break;
- case X86II::TF: // F2 0F 38
- MCE.emitByte(0xF2);
- Need0FPrefix = true;
- break;
case X86II::REP: break; // already handled.
+ case X86II::T8XS: // F3 0F 38
case X86II::XS: // F3 0F
MCE.emitByte(0xF3);
Need0FPrefix = true;
break;
+ case X86II::T8XD: // F2 0F 38
+ case X86II::TAXD: // F2 0F 3A
case X86II::XD: // F2 0F
MCE.emitByte(0xF2);
Need0FPrefix = true;
@@ -698,10 +697,12 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
MCE.emitByte(0x0F);
switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::TF: // F2 0F 38
+ case X86II::T8XD: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
case X86II::T8: // 0F 38
MCE.emitByte(0x38);
break;
+ case X86II::TAXD: // F2 0F 38
case X86II::TA: // 0F 3A
MCE.emitByte(0x3A);
break;
@@ -805,8 +806,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
}
assert(MO.isImm() && "Unknown RawFrm operand!");
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
- Opcode == X86::WINCALL64pcrel32) {
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
// Fix up immediate operand for pc relative calls.
intptr_t Imm = (intptr_t)MO.getImm();
Imm = Imm - MCE.getCurrentPCValue() - 4;
@@ -1003,7 +1003,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
break;
}
- if (!Desc->isVariadic() && CurOp != NumOps) {
+ if (!MI.isVariadic() && CurOp != NumOps) {
#ifndef NDEBUG
dbgs() << "Cannot encode all operands of: " << MI << "\n";
#endif
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 4a72d154c335..c1a49a764614 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -60,7 +60,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
llvm_unreachable("unknown x86 machine relocation type");
}
}
- return 0;
}
long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
@@ -83,7 +82,6 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
llvm_unreachable("unknown x86 relocation type");
}
}
- return 0;
}
unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
@@ -107,7 +105,6 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
llvm_unreachable("unknown x86 relocation type");
}
}
- return 0;
}
bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
@@ -132,7 +129,6 @@ bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
llvm_unreachable("unknown x86 relocation type");
}
}
- return 0;
}
unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
@@ -146,8 +142,6 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
return SymOffset - (RelOffset + 4);
- else
- assert(0 && "computeRelocation unknown for this relocation type");
- return 0;
+ llvm_unreachable("computeRelocation unknown for this relocation type");
}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f912b28eb47b..69752c5c5ddc 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -60,8 +60,8 @@ public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
- X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX();
- X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
}
virtual bool TargetSelectInstruction(const Instruction *I);
@@ -258,6 +258,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
Opc = X86ScalarSSEf64 ?
(Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
+ case MVT::v4f32:
+ Opc = X86::MOVAPSmr;
+ break;
+ case MVT::v2f64:
+ Opc = X86::MOVAPDmr;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Opc = X86::MOVDQAmr;
+ break;
}
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
@@ -671,7 +683,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
// Atomic stores need special handling.
- if (cast<StoreInst>(I)->isAtomic())
+ const StoreInst *S = cast<StoreInst>(I);
+
+ if (S->isAtomic())
+ return false;
+
+ unsigned SABIAlignment =
+ TD.getABITypeAlignment(S->getValueOperand()->getType());
+ if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
return false;
MVT VT;
@@ -709,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
// Let SDISel handle vararg functions.
@@ -818,8 +837,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
bool HasAVX = Subtarget->hasAVX();
- bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1();
- bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2();
+ bool X86ScalarSSEf32 = Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = Subtarget->hasSSE2();
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
@@ -1510,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -1524,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Fast-isel doesn't know about callee-pop yet.
if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
- GuaranteedTailCallOpt))
+ TM.Options.GuaranteedTailCallOpt))
return false;
// Check whether the function can return without sret-demotion.
@@ -1557,10 +1576,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
SmallVector<unsigned, 8> Args;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgVals.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgVals.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
// If we're lowering a mem intrinsic instead of a regular call, skip the
@@ -1740,9 +1760,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// If this is a really simple value, emit this with the Value* version
// of X86FastEmitStore. If it isn't simple, we don't want to do this,
// as it can cause us to reevaluate the argument.
- X86FastEmitStore(ArgVT, ArgVal, AM);
+ if (!X86FastEmitStore(ArgVT, ArgVal, AM))
+ return false;
} else {
- X86FastEmitStore(ArgVT, Arg, AM);
+ if (!X86FastEmitStore(ArgVT, Arg, AM))
+ return false;
}
}
}
@@ -1757,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
@@ -1771,9 +1793,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (CalleeOp) {
// Register-indirect call.
unsigned CallOpc;
- if (Subtarget->isTargetWin64())
- CallOpc = X86::WINCALL64r;
- else if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit())
CallOpc = X86::CALL64r;
else
CallOpc = X86::CALL32r;
@@ -1784,9 +1804,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Direct call.
assert(GV && "Not a direct call");
unsigned CallOpc;
- if (Subtarget->isTargetWin64())
- CallOpc = X86::WINCALL64pcrel32;
- else if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit())
CallOpc = X86::CALL64pcrel32;
else
CallOpc = X86::CALLpcrel32;
@@ -1831,10 +1849,15 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
unsigned NumBytesCallee = 0;
- if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+ if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() &&
+ CS.paramHasAttr(1, Attribute::StructRet))
NumBytesCallee = 4;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesCallee);
@@ -2081,7 +2104,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
if (!X86SelectAddress(C, AM))
return 0;
unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
- TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg), AM);
@@ -2100,7 +2123,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
default: return false;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
+ Opc = X86::FsFLD0SS;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp032;
@@ -2109,7 +2132,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
+ Opc = X86::FsFLD0SD;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp064;
@@ -2156,7 +2179,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
namespace llvm {
- llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
return new X86FastISel(funcInfo);
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index e3461c82c7a6..ed1707da13d8 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,8 +26,8 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
+#include "llvm/InlineAsm.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -37,7 +37,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -219,7 +218,7 @@ namespace {
/// getSTReg - Return the X86::ST(i) register which contains the specified
/// FP<RegNo> register.
unsigned getSTReg(unsigned RegNo) const {
- return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ return StackTop - 1 - getSlot(RegNo) + X86::ST0;
}
// pushReg - Push the specified FP<n> register onto the stack.
@@ -570,8 +569,8 @@ void FPS::finishBlockStack() {
namespace {
struct TableEntry {
- unsigned from;
- unsigned to;
+ uint16_t from;
+ uint16_t to;
bool operator<(const TableEntry &TE) const { return from < TE.from; }
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
@@ -1644,6 +1643,30 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
return;
}
+ case X86::WIN_FTOL_32:
+ case X86::WIN_FTOL_64: {
+ // Push the operand into ST0.
+ MachineOperand &Op = MI->getOperand(0);
+ assert(Op.isUse() && Op.isReg() &&
+ Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
+ unsigned FPReg = getFPReg(Op);
+ if (Op.isKill())
+ moveToTop(FPReg, I);
+ else
+ duplicateToTop(FPReg, FPReg, I);
+
+ // Emit the call. This will pop the operand.
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("_ftol2")
+ .addReg(X86::ST0, RegState::ImplicitKill)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::EDX, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ --StackTop;
+
+ break;
+ }
+
case X86::RET:
case X86::RETI:
// If RET has an FP register use operand, pass the first one in ST(0) and
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index d54f4ae2a2c8..000e3757cf76 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
+//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineModuleInfo &MMI = MF.getMMI();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- return (DisableFramePointerElim(MF) ||
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RI->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() ||
@@ -79,6 +79,10 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
+static unsigned getLEArOpcode(unsigned is64Bit) {
+ return is64Bit ? X86::LEA64r : X86::LEA32r;
+}
+
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
/// when it reaches the "return" instruction. We can then pop a stack object
/// to this register without worry about clobbering it.
@@ -91,11 +95,11 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
if (!F || MF->getMMI().callsEHReturn())
return 0;
- static const unsigned CallerSavedRegs32Bit[] = {
+ static const uint16_t CallerSavedRegs32Bit[] = {
X86::EAX, X86::EDX, X86::ECX, 0
};
- static const unsigned CallerSavedRegs64Bit[] = {
+ static const uint16_t CallerSavedRegs64Bit[] = {
X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
X86::R8, X86::R9, X86::R10, X86::R11, 0
};
@@ -113,7 +117,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
case X86::TCRETURNmi64:
case X86::EH_RETURN:
case X86::EH_RETURN64: {
- SmallSet<unsigned, 8> Uses;
+ SmallSet<uint16_t, 8> Uses;
for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MBBI->getOperand(i);
if (!MO.isReg() || MO.isDef())
@@ -121,11 +125,11 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+ for (const uint16_t *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
Uses.insert(*AsI);
}
- const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
for (; *CS; ++CS)
if (!Uses.count(*CS))
return *CS;
@@ -141,13 +145,18 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ bool Is64Bit, bool UseLEA,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub ?
- getSUBriOpcode(Is64Bit, Offset) :
- getADDriOpcode(Is64Bit, Offset);
+ unsigned Opc;
+ if (UseLEA)
+ Opc = getLEArOpcode(Is64Bit);
+ else
+ Opc = isSub
+ ? getSUBriOpcode(Is64Bit, Offset)
+ : getADDriOpcode(Is64Bit, Offset);
+
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -171,13 +180,21 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
+ MachineInstr *MI = NULL;
+
+ if (UseLEA) {
+ MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ StackPtr, false, isSub ? -ThisVal : ThisVal);
+ } else {
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
if (isSub)
MI->setFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
Offset -= ThisVal;
}
}
@@ -191,7 +208,8 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
PI->getOperand(0).getReg() == StackPtr) {
if (NumBytes)
*NumBytes += PI->getOperand(2).getImm();
@@ -210,7 +228,7 @@ static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
- // FIXME: THIS ISN'T RUN!!!
+ // FIXME: THIS ISN'T RUN!!!
return;
if (MBBI == MBB.end()) return;
@@ -237,8 +255,8 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
}
/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
+/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD/LEA and a negative for
/// SUB.
static int mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -254,7 +272,8 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
int Offset = 0;
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
PI->getOperand(0).getReg() == StackPtr){
Offset += PI->getOperand(2).getImm();
MBB.erase(PI);
@@ -351,20 +370,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
/// register. The number corresponds to the enum lists in
/// compact_unwind_encoding.h.
static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
- int Idx = 1;
- for (; *CURegs; ++CURegs, ++Idx)
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
return -1;
}
+// Number of registers that can be saved in a compact unwind encoding.
+#define CU_NUM_SAVED_REGS 6
+
/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
/// used with frameless stacks. It is passed the number of registers to be saved
/// and an array of the registers saved.
-static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
- unsigned RegCount,
- bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ unsigned RegCount, bool Is64Bit) {
// The saved registers are numbered from 1 to 6. In order to encode the order
// in which they were saved, we re-number them according to their place in the
// register order. The re-numbering is relative to the last re-numbered
@@ -385,14 +406,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
};
const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
- uint32_t RenumRegs[6];
- for (unsigned i = 6 - RegCount; i < 6; ++i) {
+ for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
if (CUReg == -1) return ~0U;
SavedRegs[i] = CUReg;
+ }
+ // Reverse the list.
+ std::swap(SavedRegs[0], SavedRegs[5]);
+ std::swap(SavedRegs[1], SavedRegs[4]);
+ std::swap(SavedRegs[2], SavedRegs[3]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
unsigned Countless = 0;
- for (unsigned j = 6 - RegCount; j < i; ++j)
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
if (SavedRegs[j] < SavedRegs[i])
++Countless;
@@ -435,8 +463,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
/// compact encoding with a frame pointer.
-static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
- bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ bool Is64Bit) {
static const unsigned CU32BitRegs[] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
@@ -446,18 +475,21 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
// Encode the registers in the order they were saved, 3-bits per register. The
- // registers are numbered from 1 to 6.
+ // registers are numbered from 1 to CU_NUM_SAVED_REGS.
uint32_t RegEnc = 0;
- for (int I = 5; I >= 0; --I) {
+ for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) {
unsigned Reg = SavedRegs[I];
- if (Reg == 0) break;
+ if (Reg == 0) continue;
+
int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
- if (CURegNum == -1)
- return ~0U;
- RegEnc |= (CURegNum & 0x7) << (5 - I);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for each
+ // register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
}
- assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
+ assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!");
return RegEnc;
}
@@ -466,14 +498,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
bool Is64Bit = STI.is64Bit();
bool HasFP = hasFP(MF);
- unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
- int SavedRegIdx = 6;
+ unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
+ unsigned SavedRegIdx = 0;
unsigned OffsetSize = (Is64Bit ? 8 : 4);
@@ -481,14 +510,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned PushInstrSize = 1;
unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
- unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
unsigned StackDivide = (Is64Bit ? 8 : 4);
unsigned InstrOffset = 0;
- unsigned CFAOffset = 0;
unsigned StackAdjust = 0;
+ unsigned StackSize = 0;
MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
bool ExpectEnd = false;
@@ -504,10 +532,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (Opc == PushInstr) {
// If there are too many saved registers, we cannot use compact encoding.
- if (--SavedRegIdx < 0) return 0;
+ if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
- SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
- CFAOffset += OffsetSize;
+ SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
+ StackAdjust += OffsetSize;
InstrOffset += PushInstrSize;
} else if (Opc == MoveInstr) {
unsigned SrcReg = MI.getOperand(1).getReg();
@@ -516,12 +544,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (DstReg != FramePtr || SrcReg != StackPtr)
return 0;
- CFAOffset = 0;
+ StackAdjust = 0;
memset(SavedRegs, 0, sizeof(SavedRegs));
+ SavedRegIdx = 0;
InstrOffset += MoveInstrSize;
- } else if (Opc == SubtractInstr) {
- if (StackAdjust)
- // We all ready have a stack pointer adjustment.
+ } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
+ if (StackSize)
+ // We already have a stack size.
return 0;
if (!MI.getOperand(0).isReg() ||
@@ -532,7 +562,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// %RSP<def> = SUB64ri8 %RSP, 48
return 0;
- StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+ StackSize = MI.getOperand(2).getImm() / StackDivide;
SubtractInstrIdx += InstrOffset;
ExpectEnd = true;
}
@@ -540,28 +570,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// Encode that we are using EBP/RBP as the frame pointer.
uint32_t CompactUnwindEncoding = 0;
- CFAOffset /= StackDivide;
+ StackAdjust /= StackDivide;
if (HasFP) {
- if ((CFAOffset & 0xFF) != CFAOffset)
+ if ((StackAdjust & 0xFF) != StackAdjust)
// Offset was too big for compact encoding.
return 0;
// Get the encoding of the saved registers when we have a frame pointer.
uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
- if (RegEnc == ~0U)
- return 0;
+ if (RegEnc == ~0U) return 0;
CompactUnwindEncoding |= 0x01000000;
- CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+ CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
CompactUnwindEncoding |= RegEnc & 0x7FFF;
} else {
- unsigned FullOffset = CFAOffset + StackAdjust;
- if ((FullOffset & 0xFF) == FullOffset) {
- // Frameless stack.
+ ++StackAdjust;
+ uint32_t TotalStackSize = StackAdjust + StackSize;
+ if ((TotalStackSize & 0xFF) == TotalStackSize) {
+ // Frameless stack with a small stack size.
CompactUnwindEncoding |= 0x02000000;
- CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+
+ // Encode the stack size.
+ CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
} else {
- if ((CFAOffset & 0x7) != CFAOffset)
+ if ((StackAdjust & 0x7) != StackAdjust)
// The extra stack adjustments are too big for us to handle.
return 0;
@@ -572,16 +604,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// instruction.
CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
- // Encode any extra stack stack changes (done via push instructions).
- CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+ // Encode any extra stack stack adjustments (done via push instructions).
+ CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
}
+ // Encode the number of registers saved.
+ CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
// Get the encoding of the saved registers when we don't have a frame
// pointer.
- uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
- 6 - SavedRegIdx,
- Is64Bit);
+ uint32_t RegEnc =
+ encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
+ Is64Bit);
if (RegEnc == ~0U) return 0;
+
+ // Encode the register encoding.
CompactUnwindEncoding |= RegEnc & 0x3FF;
}
@@ -608,6 +645,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
bool Is64Bit = STI.is64Bit();
bool IsWin64 = STI.isTargetWin64();
+ bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -637,10 +675,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// stack pointer (we fit in the Red Zone).
if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !IsWin64 && // Win64 has no Red Zone
- !EnableSegmentedStacks) { // Regular stack
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64 && // Win64 has no Red Zone
+ !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -861,7 +899,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
+ UseLEA, TII, *RegInfo);
if (isEAXAlive) {
// Restore EAX
@@ -873,7 +911,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
} else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
+ UseLEA, TII, *RegInfo);
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
@@ -917,6 +955,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
+ bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -977,7 +1016,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned Opc = PI->getOpcode();
if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
- !PI->getDesc().isTerminator())
+ !PI->isTerminator())
break;
--MBBI;
@@ -997,7 +1036,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// We cannot use LEA here, because stack pointer was realigned. We need to
// deallocate local frame back.
if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
+ *RegInfo);
MBBI = prior(LastCSPop);
}
@@ -1018,7 +1058,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
}
// We're returning from function via eh_return.
@@ -1053,7 +1093,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
}
// Jump to label or value in register.
@@ -1097,7 +1137,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
}
}
@@ -1280,31 +1320,35 @@ HasNestArgument(const MachineFunction *MF) {
return false;
}
+
+/// GetScratchRegister - Get a register for performing work in the segmented
+/// stack prologue. Depending on platform and the properties of the function
+/// either one or two registers will be needed. Set primary to true for
+/// the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
- if (Is64Bit) {
- return X86::R11;
- } else {
- CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
- bool IsNested = HasNestArgument(&MF);
-
- if (CallingConvention == CallingConv::X86_FastCall) {
- if (IsNested) {
- report_fatal_error("Segmented stacks does not support fastcall with "
- "nested function.");
- return -1;
- } else {
- return X86::EAX;
- }
- } else {
- if (IsNested)
- return X86::EDX;
- else
- return X86::ECX;
- }
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+ if (Is64Bit)
+ return Primary ? X86::R11 : X86::R12;
+
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+ bool IsNested = HasNestArgument(&MF);
+
+ if (CallingConvention == CallingConv::X86_FastCall ||
+ CallingConvention == CallingConv::Fast) {
+ if (IsNested)
+ report_fatal_error("Segmented stacks does not support fastcall with "
+ "nested function.");
+ return Primary ? X86::EAX : X86::ECX;
}
+ if (IsNested)
+ return Primary ? X86::EDX : X86::EAX;
+ return Primary ? X86::ECX : X86::EAX;
}
+// The stack limit in the TCB is set to this many bytes above the actual stack
+// limit.
+static const uint64_t kSplitStackAvailable = 256;
+
void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
@@ -1316,14 +1360,15 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
DebugLoc DL;
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux())
- report_fatal_error("Segmented stacks supported only on linux.");
+ if (!ST->isTargetLinux() && !ST->isTargetDarwin() &&
+ !ST->isTargetWin32() && !ST->isTargetFreeBSD())
+ report_fatal_error("Segmented stacks not supported on this platform.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -1336,26 +1381,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// The MOV R10, RAX needs to be in a different block, since the RET we emit in
// allocMBB needs to be last (terminating) instruction.
- MachineBasicBlock *restoreR10MBB = NULL;
- if (IsNested)
- restoreR10MBB = MF.CreateMachineBasicBlock();
for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
e = prologueMBB.livein_end(); i != e; i++) {
allocMBB->addLiveIn(*i);
checkMBB->addLiveIn(*i);
-
- if (IsNested)
- restoreR10MBB->addLiveIn(*i);
}
- if (IsNested) {
+ if (IsNested)
allocMBB->addLiveIn(X86::R10);
- restoreR10MBB->addLiveIn(X86::RAX);
- }
- if (IsNested)
- MF.push_front(restoreR10MBB);
MF.push_front(allocMBB);
MF.push_front(checkMBB);
@@ -1364,28 +1399,99 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// prologue.
StackSize = MFI->getStackSize();
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- TlsReg = X86::FS;
- TlsOffset = 0x70;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ } else if (ST->isTargetFreeBSD()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x18;
+ } else {
+ report_fatal_error("Segmented stacks not supported on this platform.");
+ }
+
+ if (CompareStackPointer)
+ ScratchReg = X86::RSP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
- TlsReg = X86::GS;
- TlsOffset = 0x30;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x30;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x48 + 90*4;
+ } else if (ST->isTargetWin32()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x14; // pvArbitrary, reserved for application use
+ } else if (ST->isTargetFreeBSD()) {
+ report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
+ } else {
+ report_fatal_error("Segmented stacks not supported on this platform.");
+ }
+
+ if (CompareStackPointer)
+ ScratchReg = X86::ESP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
+
+ if (ST->isTargetLinux() || ST->isTargetWin32()) {
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else if (ST->isTargetDarwin()) {
+
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ unsigned ScratchReg2;
+ bool SaveScratch2;
+ if (CompareStackPointer) {
+ // The primary scratch register is available for holding the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ SaveScratch2 = false;
+ } else {
+ // Need to use a second register to hold the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+ // Unfortunately, with fastcc the second scratch register may hold an arg
+ SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+ }
- BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ // If Scratch2 is live-in then it needs to be saved
+ assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+ "Scratch register is live-in and not saved");
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+ .addReg(ScratchReg2, RegState::Kill);
+
+ BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+ .addImm(TlsOffset);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2).addImm(1).addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+ }
}
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
// It jumps to normal execution of the function body.
- BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB);
+ BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
// On 32 bit we first push the arguments size and then the frame size. On 64
// bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -1403,9 +1509,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.getRegInfo().setPhysRegUsed(X86::R10);
MF.getRegInfo().setPhysRegUsed(X86::R11);
} else {
- // Since we'll call __morestack, stack alignment needs to be preserved.
- BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
- .addImm(8);
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
@@ -1420,23 +1523,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
.addExternalSymbol("__morestack");
- // __morestack only seems to remove 8 bytes off the stack. Add back the
- // additional 8 bytes we added before pushing the arguments.
- if (!Is64Bit)
- BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
- .addImm(8);
- BuildMI(allocMBB, DL, TII.get(X86::RET));
-
if (IsNested)
- BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10)
- .addReg(X86::RAX);
+ BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
- if (IsNested) {
- allocMBB->addSuccessor(restoreR10MBB);
- restoreR10MBB->addSuccessor(&prologueMBB);
- } else {
- allocMBB->addSuccessor(&prologueMBB);
- }
+ allocMBB->addSuccessor(&prologueMBB);
checkMBB->addSuccessor(allocMBB);
checkMBB->addSuccessor(&prologueMBB);
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 6f490640b4ed..d55a49763a4d 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -1,4 +1,4 @@
-//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//===-- X86TargetFrameLowering.h - Define frame lowering for X86 -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 02b0ff26032b..8e2b1d6b5dd2 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -21,7 +21,6 @@
#include "X86TargetMachine.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,11 +31,11 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -540,7 +539,7 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing()) {
unsigned CallOp =
- Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+ Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
TII->get(CallOp)).addExternalSymbol("__main");
}
@@ -621,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
// Handle X86-64 rip-relative addresses. We check this before checking direct
// folding because RIP is preferable to non-RIP accesses.
- if (Subtarget->is64Bit() &&
+ if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
// they cannot be folded into immediate fields.
// FIXME: This can be improved for kernel and other models?
- (M == CodeModel::Small || M == CodeModel::Kernel) &&
- // Base and index reg must be 0 in order to use %rip as base and lowering
- // must allow RIP.
- !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ // Base and index reg must be 0 in order to use %rip as base.
+ if (AM.hasBaseOrIndexReg())
+ return true;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
@@ -663,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
}
// Handle the case when globals fit in our immediate field: This is true for
- // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
- // mode, this results in a non-RIP-relative computation.
+ // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
+ // mode, this only applies to a non-RIP-relative computation.
if (!Subtarget->is64Bit() ||
- ((M == CodeModel::Small || M == CodeModel::Kernel) &&
- TM.getRelocationModel() == Reloc::Static)) {
+ M == CodeModel::Small || M == CodeModel::Kernel) {
+ assert(N.getOpcode() != X86ISD::WrapperRIP &&
+ "RIP-relative addressing already handled");
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
@@ -725,6 +725,213 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
+// Insert a node into the DAG at least before the Pos node's position. This
+// will reposition the node as needed, and will assign it a node ID that is <=
+// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
+// IDs! The selection DAG must no longer depend on their uniqueness when this
+// is used.
+static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+ if (N.getNode()->getNodeId() == -1 ||
+ N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
+ DAG.RepositionNode(Pos.getNode(), N.getNode());
+ N.getNode()->setNodeId(Pos.getNode()->getNodeId());
+ }
+}
+
+// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
+// allows us to convert the shift and and into an h-register extract and
+// a scaled index. Returns false if the simplification is performed.
+static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SRL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)) ||
+ !Shift.hasOneUse())
+ return true;
+
+ int ScaleLog = 8 - Shift.getConstantOperandVal(1);
+ if (ScaleLog <= 0 || ScaleLog >= 4 ||
+ Mask != (0xffu << ScaleLog))
+ return true;
+
+ EVT VT = N.getValueType();
+ DebugLoc DL = N.getDebugLoc();
+ SDValue Eight = DAG.getConstant(8, MVT::i8);
+ SDValue NewMask = DAG.getConstant(0xff, VT);
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
+ SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, Eight);
+ InsertDAGNode(DAG, N, Srl);
+ InsertDAGNode(DAG, N, NewMask);
+ InsertDAGNode(DAG, N, And);
+ InsertDAGNode(DAG, N, ShlCount);
+ InsertDAGNode(DAG, N, Shl);
+ DAG.ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+}
+
+// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
+// allows us to fold the shift into this addressing mode. Returns false if the
+// transform succeeded.
+static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SHL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)))
+ return true;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ return true;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
+ return true;
+
+ EVT VT = N.getValueType();
+ DebugLoc DL = N.getDebugLoc();
+ SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, NewMask);
+ InsertDAGNode(DAG, N, NewAnd);
+ InsertDAGNode(DAG, N, NewShift);
+ DAG.ReplaceAllUsesWith(N, NewShift);
+
+ AM.Scale = 1 << ShiftAmt;
+ AM.IndexReg = NewAnd;
+ return false;
+}
+
+// Implement some heroics to detect shifts of masked values where the mask can
+// be replaced by extending the shift and undoing that in the addressing mode
+// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
+// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
+// the addressing mode. This results in code such as:
+//
+// int f(short *y, int *lookup_table) {
+// ...
+// return *y + lookup_table[*y >> 11];
+// }
+//
+// Turning into:
+// movzwl (%rdi), %eax
+// movl %eax, %ecx
+// shrl $11, %ecx
+// addl (%rsi,%rcx,4), %eax
+//
+// Instead of:
+// movzwl (%rdi), %eax
+// movl %eax, %ecx
+// shrl $9, %ecx
+// andl $124, %rcx
+// addl (%rsi,%rcx), %eax
+//
+// Note that this function assumes the mask is provided as a mask *after* the
+// value is shifted. The input chain may or may not match that, but computing
+// such a mask is trivial.
+static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)))
+ return true;
+
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ unsigned MaskLZ = CountLeadingZeros_64(Mask);
+ unsigned MaskTZ = CountTrailingZeros_64(Mask);
+
+ // The amount of shift we're trying to fit into the addressing mode is taken
+ // from the trailing zeros of the mask.
+ unsigned AMShiftAmt = MaskTZ;
+
+ // There is nothing we can do here unless the mask is removing some bits.
+ // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+ if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+ // We also need to ensure that mask is a continuous run of bits.
+ if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
+
+ // Scale the leading zero count down based on the actual size of the value.
+ // Also scale it down based on the size of the shift.
+ MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt;
+
+ // The final check is to ensure that any masked out high bits of X are
+ // already known to be zero. Otherwise, the mask has a semantic impact
+ // other than masking out a couple of low bits. Unfortunately, because of
+ // the mask, zero extensions will be removed from operands in some cases.
+ // This code works extra hard to look through extensions because we can
+ // replace them with zero extensions cheaply if necessary.
+ bool ReplacingAnyExtend = false;
+ if (X.getOpcode() == ISD::ANY_EXTEND) {
+ unsigned ExtendBits =
+ X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits();
+ // Assume that we'll replace the any-extend with a zero-extend, and
+ // narrow the search to the extended value.
+ X = X.getOperand(0);
+ MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
+ ReplacingAnyExtend = true;
+ }
+ APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
+ MaskLZ);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
+ if (MaskedHighBits != KnownZero) return true;
+
+ // We've identified a pattern that can be transformed into a single shift
+ // and an addressing mode. Make it so.
+ EVT VT = N.getValueType();
+ if (ReplacingAnyExtend) {
+ assert(X.getValueType() != VT);
+ // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
+ SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X);
+ InsertDAGNode(DAG, N, NewX);
+ X = NewX;
+ }
+ DebugLoc DL = N.getDebugLoc();
+ SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+ SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, NewSRLAmt);
+ InsertDAGNode(DAG, N, NewSRL);
+ InsertDAGNode(DAG, N, NewSHLAmt);
+ InsertDAGNode(DAG, N, NewSHL);
+ DAG.ReplaceAllUsesWith(N, NewSHL);
+
+ AM.Scale = 1 << AMShiftAmt;
+ AM.IndexReg = NewSRL;
+ return false;
+}
+
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
DebugLoc dl = N.getDebugLoc();
@@ -814,6 +1021,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
}
+ case ISD::SRL: {
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ SDValue And = N.getOperand(0);
+ if (And.getOpcode() != ISD::AND) break;
+ SDValue X = And.getOperand(0);
+
+ // We only handle up to 64-bit values here as those are what matter for
+ // addressing mode optimizations.
+ if (X.getValueSizeInBits() > 64) break;
+
+ // The mask used for the transform is expected to be post-shift, but we
+ // found the shift first so just apply the shift to the mask before passing
+ // it down.
+ if (!isa<ConstantSDNode>(N.getOperand(1)) ||
+ !isa<ConstantSDNode>(And.getOperand(1)))
+ break;
+ uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
+
+ // Try to fold the mask and shift into the scale, and return false if we
+ // succeed.
+ if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
+ return false;
+ break;
+ }
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
// A mul_lohi where we need the low part can be folded as a plain multiply.
@@ -917,16 +1151,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
- if (Zero.getNode()->getNodeId() == -1 ||
- Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Zero.getNode());
- Zero.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (Neg.getNode()->getNodeId() == -1 ||
- Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Neg.getNode());
- Neg.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+ InsertDAGNode(*CurDAG, N, Zero);
+ InsertDAGNode(*CurDAG, N, Neg);
return false;
}
@@ -981,121 +1207,34 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Perform some heroic transforms on an and of a constant-count shift
// with a constant to enable use of the scaled offset field.
- SDValue Shift = N.getOperand(0);
- if (Shift.getNumOperands() != 2) break;
-
// Scale must not be used already.
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+ SDValue Shift = N.getOperand(0);
+ if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
SDValue X = Shift.getOperand(0);
- ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
- ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
- if (!C1 || !C2) break;
-
- // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
- // allows us to convert the shift and and into an h-register extract and
- // a scaled index.
- if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
- unsigned ScaleLog = 8 - C1->getZExtValue();
- if (ScaleLog > 0 && ScaleLog < 4 &&
- C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
- SDValue Eight = CurDAG->getConstant(8, MVT::i8);
- SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
- SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
- X, Eight);
- SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
- Srl, Mask);
- SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
- SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
- And, ShlCount);
-
- // Insert the new nodes into the topological ordering.
- if (Eight.getNode()->getNodeId() == -1 ||
- Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- CurDAG->RepositionNode(X.getNode(), Eight.getNode());
- Eight.getNode()->setNodeId(X.getNode()->getNodeId());
- }
- if (Mask.getNode()->getNodeId() == -1 ||
- Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- CurDAG->RepositionNode(X.getNode(), Mask.getNode());
- Mask.getNode()->setNodeId(X.getNode()->getNodeId());
- }
- if (Srl.getNode()->getNodeId() == -1 ||
- Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
- CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
- Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
- }
- if (And.getNode()->getNodeId() == -1 ||
- And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), And.getNode());
- And.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (ShlCount.getNode()->getNodeId() == -1 ||
- ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
- ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (Shl.getNode()->getNodeId() == -1 ||
- Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Shl.getNode());
- Shl.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- CurDAG->ReplaceAllUsesWith(N, Shl);
- AM.IndexReg = And;
- AM.Scale = (1 << ScaleLog);
- return false;
- }
- }
- // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
- // allows us to fold the shift into this addressing mode.
- if (Shift.getOpcode() != ISD::SHL) break;
+ // We only handle up to 64-bit values here as those are what matter for
+ // addressing mode optimizations.
+ if (X.getValueSizeInBits() > 64) break;
- // Not likely to be profitable if either the AND or SHIFT node has more
- // than one use (unless all uses are for address computation). Besides,
- // isel mechanism requires their node ids to be reused.
- if (!N.hasOneUse() || !Shift.hasOneUse())
- break;
-
- // Verify that the shift amount is something we can fold.
- unsigned ShiftCst = C1->getZExtValue();
- if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
break;
-
- // Get the new AND mask, this folds to a constant.
- SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
- SDValue(C2, 0), SDValue(C1, 0));
- SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
- NewANDMask);
- SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
- NewAND, SDValue(C1, 0));
+ uint64_t Mask = N.getConstantOperandVal(1);
- // Insert the new nodes into the topological ordering.
- if (C1->getNodeId() > X.getNode()->getNodeId()) {
- CurDAG->RepositionNode(X.getNode(), C1);
- C1->setNodeId(X.getNode()->getNodeId());
- }
- if (NewANDMask.getNode()->getNodeId() == -1 ||
- NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
- NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
- }
- if (NewAND.getNode()->getNodeId() == -1 ||
- NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
- CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
- NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
- }
- if (NewSHIFT.getNode()->getNodeId() == -1 ||
- NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
- NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+ // Try to fold the mask and shift into an extract and scale.
+ if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
- CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
-
- AM.Scale = 1 << ShiftCst;
- AM.IndexReg = NewAND;
- return false;
+ // Try to fold the mask and shift directly into the scale.
+ if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+
+ // Try to swap the mask and shift to place shifts which can be done as
+ // a scale on the outside of the mask.
+ if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+ break;
}
}
@@ -1515,7 +1654,7 @@ enum AtomicSz {
AtomicSzEnd
};
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
{
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
@@ -1709,6 +1848,96 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
return true;
}
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+ SDValue StoredVal, SelectionDAG *CurDAG,
+ LoadSDNode* &LoadNode, SDValue &InputChain) {
+
+ // is the value stored the result of a DEC or INC?
+ if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+ // is the stored value result 0 of the load?
+ if (StoredVal.getResNo() != 0) return false;
+
+ // are there other uses of the loaded value than the inc or dec?
+ if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+ // is the store non-extending and non-indexed?
+ if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+ return false;
+
+ SDValue Load = StoredVal->getOperand(0);
+ // Is the stored value a non-extending and non-indexed load?
+ if (!ISD::isNormalLoad(Load.getNode())) return false;
+
+ // Return LoadNode by reference.
+ LoadNode = cast<LoadSDNode>(Load);
+ // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ LdVT != MVT::i8)
+ return false;
+
+ // Is store the only read of the loaded value?
+ if (!Load.hasOneUse())
+ return false;
+
+ // Is the address of the store the same as the load?
+ if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+ LoadNode->getOffset() != StoreNode->getOffset())
+ return false;
+
+ // Check if the chain is produced by the load or is a TokenFactor with
+ // the load output chain as an operand. Return InputChain by reference.
+ SDValue Chain = StoreNode->getChain();
+
+ bool ChainCheck = false;
+ if (Chain == Load.getValue(1)) {
+ ChainCheck = true;
+ InputChain = LoadNode->getChain();
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ SmallVector<SDValue, 4> ChainOps;
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+ SDValue Op = Chain.getOperand(i);
+ if (Op == Load.getValue(1)) {
+ ChainCheck = true;
+ continue;
+ }
+ ChainOps.push_back(Op);
+ }
+
+ if (ChainCheck)
+ // Make a new TokenFactor with all the other input chains except
+ // for the load.
+ InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(),
+ MVT::Other, &ChainOps[0], ChainOps.size());
+ }
+ if (!ChainCheck)
+ return false;
+
+ return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+ if (Opc == X86ISD::DEC) {
+ if (LdVT == MVT::i64) return X86::DEC64m;
+ if (LdVT == MVT::i32) return X86::DEC32m;
+ if (LdVT == MVT::i16) return X86::DEC16m;
+ if (LdVT == MVT::i8) return X86::DEC8m;
+ } else {
+ assert(Opc == X86ISD::INC && "unrecognized opcode");
+ if (LdVT == MVT::i64) return X86::INC64m;
+ if (LdVT == MVT::i32) return X86::INC32m;
+ if (LdVT == MVT::i16) return X86::INC16m;
+ if (LdVT == MVT::i8) return X86::INC8m;
+ }
+ llvm_unreachable("unrecognized size for LdVT");
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
@@ -1829,7 +2058,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
- break;
}
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
@@ -2114,7 +2342,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
HasNoSignedComparisonUses(Node))
// Look past the truncate if CMP is the only use of it.
N0 = N0.getOperand(0);
- if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ if ((N0.getNode()->getOpcode() == ISD::AND ||
+ (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
+ N0.getNode()->hasOneUse() &&
N0.getValueType() != MVT::i8 &&
X86::isZeroNode(N1)) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
@@ -2129,7 +2359,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// On x86-32, only the ABCD registers have 8-bit subregisters.
if (!Subtarget->is64Bit()) {
- TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *TRC;
switch (N0.getValueType().getSimpleVT().SimpleTy) {
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
@@ -2158,7 +2388,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Put the value in an ABCD register.
- TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *TRC;
switch (N0.getValueType().getSimpleVT().SimpleTy) {
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
@@ -2214,6 +2444,56 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case ISD::STORE: {
+ // Change a chain of {load; incr or dec; store} of the same value into
+ // a simple increment or decrement through memory of that value, if the
+ // uses of the modified value and its address are suitable.
+ // The DEC64m tablegen pattern is currently not able to match the case where
+ // the EFLAGS on the original DEC are used. (This also applies to
+ // {INC,DEC}X{64,32,16,8}.)
+ // We'll need to improve tablegen to allow flags to be transferred from a
+ // node in the pattern to the result node. probably with a new keyword
+ // for example, we have this
+ // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ // (implicit EFLAGS)]>;
+ // but maybe need something like this
+ // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ // (transferrable EFLAGS)]>;
+
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
+ SDValue StoredVal = StoreNode->getOperand(1);
+ unsigned Opc = StoredVal->getOpcode();
+
+ LoadSDNode *LoadNode = 0;
+ SDValue InputChain;
+ if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
+ LoadNode, InputChain))
+ break;
+
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+ Base, Scale, Index, Disp, Segment))
+ break;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
+ MemOp[0] = StoreNode->getMemOperand();
+ MemOp[1] = LoadNode->getMemOperand();
+ const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
+ EVT LdVT = LoadNode->getMemoryVT();
+ unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+ MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
+ Node->getDebugLoc(),
+ MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ Result->setMemRefs(MemOp, MemOp + 2);
+
+ ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
+ ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
+
+ return Result;
+ }
}
SDNode *ResNode = SelectCode(Node);
@@ -2254,6 +2534,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7c8ce177ecd3..9b83aade3381 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13,9 +13,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-isel"
+#include "X86ISelLowering.h"
#include "X86.h"
#include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "Utils/X86ShuffleDecode.h"
@@ -35,25 +35,21 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <bitset>
using namespace llvm;
-using namespace dwarf;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -61,17 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static SDValue Insert128BitVector(SDValue Result,
- SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl);
-
-static SDValue Extract128BitVector(SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl);
-
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference. Idx is an index in the 128 bits we
@@ -169,8 +154,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- X86ScalarSSEf64 = Subtarget->hasXMMInt();
- X86ScalarSSEf32 = Subtarget->hasXMM();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
@@ -186,8 +171,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
+ // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling.
if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
+ else if (Subtarget->isAtom())
+ setSchedulingPreference(Sched::Hybrid);
else
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
@@ -199,15 +187,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+
+ // The _ftol2 runtime function has an unusual calling conv, which
+ // is modeled by a special pseudo-instruction.
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
}
if (Subtarget->isTargetDarwin()) {
@@ -256,8 +247,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
- setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
- } else if (!UseSoftFloat) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ } else if (!TM.Options.UseSoftFloat) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
@@ -271,7 +262,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
- if (!UseSoftFloat) {
+ if (!TM.Options.UseSoftFloat) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
@@ -314,7 +305,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
@@ -327,6 +318,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
}
+ if (isTargetFTOL()) {
+ // Use the _ftol2 runtime function, which has a pseudo-instruction
+ // to handle its weird calling convention.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ }
+
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
@@ -379,10 +376,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+ // Promote the i8 variants and force them on up to i32 which has a shorter
+ // encoding.
+ setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
if (Subtarget->hasBMI()) {
- setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
- setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
if (Subtarget->is64Bit())
@@ -390,13 +395,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
if (Subtarget->hasLZCNT()) {
+ // When promoting the i8 variants, force them to i32 for a shorter
+ // encoding.
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
+ AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
- if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+ }
}
if (Subtarget->hasPOPCNT()) {
@@ -459,7 +478,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- if (Subtarget->hasXMM())
+ if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
@@ -538,14 +557,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
- else if (EnableSegmentedStacks)
+ else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Expand);
- if (!UseSoftFloat && X86ScalarSSEf64) {
+ if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -577,7 +596,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -606,11 +625,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
@@ -621,7 +640,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
@@ -640,7 +659,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87.
- if (!UseSoftFloat) {
+ if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -659,11 +678,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
}
+ setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f80, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
+ setOperationAction(ISD::FRINT, MVT::f80, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
@@ -715,7 +739,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
@@ -749,7 +775,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!UseSoftFloat && Subtarget->hasMMX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
@@ -786,7 +812,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
- if (!UseSoftFloat && Subtarget->hasXMM()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -803,7 +829,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
- if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -909,7 +935,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
}
- if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSE41()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
@@ -924,10 +950,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
- // Can turn SHL into an integer multiply.
- setOperationAction(ISD::SHL, MVT::v4i32, Custom);
- setOperationAction(ISD::SHL, MVT::v16i8, Custom);
-
setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
@@ -948,30 +970,47 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ // FIXME: these should be Legal but thats only for the case where
+ // the index is constant. For now custom expand to deal with that.
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
}
- if (Subtarget->hasXMMInt()) {
- setOperationAction(ISD::SRL, MVT::v2i64, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Custom);
- setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+ if (Subtarget->hasSSE2()) {
setOperationAction(ISD::SRL, MVT::v8i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i8, Custom);
- setOperationAction(ISD::SHL, MVT::v2i64, Custom);
- setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v8i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i8, Custom);
+
+ if (Subtarget->hasAVX2()) {
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SHL, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Legal);
+ } else {
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
+ }
}
- if (Subtarget->hasSSE42() || Subtarget->hasAVX())
+ if (Subtarget->hasSSE42())
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
- if (!UseSoftFloat && Subtarget->hasAVX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
@@ -1008,18 +1047,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
- setOperationAction(ISD::SRL, MVT::v4i64, Custom);
- setOperationAction(ISD::SRL, MVT::v8i32, Custom);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
- setOperationAction(ISD::SHL, MVT::v4i64, Custom);
- setOperationAction(ISD::SHL, MVT::v8i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i16, Custom);
setOperationAction(ISD::SHL, MVT::v32i8, Custom);
- setOperationAction(ISD::SRA, MVT::v8i32, Custom);
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRA, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
@@ -1030,25 +1065,60 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
- setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+
+ if (Subtarget->hasAVX2()) {
+ setOperationAction(ISD::ADD, MVT::v4i64, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v16i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v32i8, Legal);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v16i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v32i8, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Legal);
+ setOperationAction(ISD::MUL, MVT::v16i16, Legal);
+ // Don't lower v32i8 because there is no 128-bit byte mul
+
+ setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
+
+ setOperationAction(ISD::SRL, MVT::v4i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v8i32, Legal);
+
+ setOperationAction(ISD::SHL, MVT::v4i64, Legal);
+ setOperationAction(ISD::SHL, MVT::v8i32, Legal);
+
+ setOperationAction(ISD::SRA, MVT::v8i32, Legal);
+ } else {
+ setOperationAction(ISD::ADD, MVT::v4i64, Custom);
+ setOperationAction(ISD::ADD, MVT::v8i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i16, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i32, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i16, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, Custom);
+ // Don't lower v32i8 because there is no 128-bit byte mul
- setOperationAction(ISD::ADD, MVT::v4i64, Custom);
- setOperationAction(ISD::ADD, MVT::v8i32, Custom);
- setOperationAction(ISD::ADD, MVT::v16i16, Custom);
- setOperationAction(ISD::ADD, MVT::v32i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
- setOperationAction(ISD::SUB, MVT::v4i64, Custom);
- setOperationAction(ISD::SUB, MVT::v8i32, Custom);
- setOperationAction(ISD::SUB, MVT::v16i16, Custom);
- setOperationAction(ISD::SUB, MVT::v32i8, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
- setOperationAction(ISD::MUL, MVT::v4i64, Custom);
- setOperationAction(ISD::MUL, MVT::v8i32, Custom);
- setOperationAction(ISD::MUL, MVT::v16i16, Custom);
- // Don't lower v32i8 because there is no 128-bit byte mul
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+ }
// Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -1099,7 +1169,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// of this type with custom code.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
+ Custom);
}
// We want to custom lower some of our intrinsics.
@@ -1137,7 +1208,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
@@ -1152,9 +1222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
+ if (Subtarget->hasBMI())
+ setTargetDAGCombine(ISD::XOR);
computeRegisterProperties();
@@ -1166,10 +1240,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- setPrefLoopAlignment(16);
+ setPrefLoopAlignment(4); // 2^4 bytes.
benefitFromCodePlacementOpt = true;
- setPrefFunctionAlignment(4);
+ setPrefFunctionAlignment(4); // 2^4 bytes.
}
@@ -1219,7 +1293,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
}
unsigned Align = 4;
- if (Subtarget->hasXMM())
+ if (Subtarget->hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
@@ -1230,7 +1304,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
-/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// 'IsZeroVal' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
@@ -1239,31 +1313,34 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe,
+ bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
- if (NonScalarIntSafe &&
+ if (IsZeroVal &&
!F->hasFnAttr(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16))) &&
Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->hasAVX() &&
- Subtarget->getStackAlignment() >= 32)
- return MVT::v8f32;
- if (Subtarget->hasXMMInt())
+ if (Subtarget->getStackAlignment() >= 32) {
+ if (Subtarget->hasAVX2())
+ return MVT::v8i32;
+ if (Subtarget->hasAVX())
+ return MVT::v8f32;
+ }
+ if (Subtarget->hasSSE2())
return MVT::v4i32;
- if (Subtarget->hasXMM())
+ if (Subtarget->hasSSE1())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 &&
- Subtarget->hasXMMInt()) {
+ Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
@@ -1428,14 +1505,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
- (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
+ (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
- (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
+ (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -1461,7 +1538,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
- if (!Subtarget->hasXMMInt())
+ if (!Subtarget->hasSSE2())
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
@@ -1501,15 +1578,21 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
}
-bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
+ SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
- if (Copy->getOpcode() != ISD::CopyToReg &&
- Copy->getOpcode() != ISD::FP_EXTEND)
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
@@ -1520,7 +1603,11 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
HasRet = true;
}
- return HasRet;
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
}
EVT
@@ -1561,7 +1648,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
- ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -1651,7 +1738,7 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!CI->isTailCall())
+ if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
return false;
CallSite CS(CI);
@@ -1664,7 +1751,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
+ bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
@@ -1678,7 +1766,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
+ getTargetMachine().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -1704,7 +1793,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ false, false, false, 0);
}
}
@@ -1728,6 +1817,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
+ bool IsWindows = Subtarget->isTargetWindows();
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
@@ -1758,7 +1848,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
- TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
else if (Is64Bit && RegVT == MVT::i64)
@@ -1807,7 +1897,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
InVals.push_back(ArgValue);
}
@@ -1828,7 +1918,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
- if (FuncIsMadeTailCallSafe(CallConv))
+ if (FuncIsMadeTailCallSafe(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
@@ -1842,17 +1933,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
- static const unsigned GPR64ArgRegsWin64[] = {
+ static const uint16_t GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const unsigned GPR64ArgRegs64Bit[] = {
+ static const uint16_t GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
- static const unsigned XMMArgRegs64Bit[] = {
+ static const uint16_t XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const unsigned *GPR64ArgRegs;
+ const uint16_t *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
@@ -1865,17 +1956,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
- NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
+ TotalNumXMMRegs);
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
- assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
+ assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
- assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+ assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
+ NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+ if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+ !Subtarget->hasSSE1())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
@@ -1892,8 +1986,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
+ // registers, then we must store them to their spots on the stack so
+ // they may be loaded by deferencing the result of va_next.
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
FuncInfo->setRegSaveFrameIndex(
@@ -1953,12 +2047,14 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
}
// Some CCs need callee pop.
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
+ if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+ ArgsAreStructReturn(Ins))
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -2006,7 +2102,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
@@ -2033,7 +2129,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
SDValue
X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2042,9 +2138,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
+ bool IsWindows = Subtarget->isTargetWindows();
bool IsStructRet = CallIsStructReturn(Outs);
bool IsSibcall = false;
+ if (MF.getTarget().Options.DisableTailCalls)
+ isTailCall = false;
+
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
@@ -2053,7 +2153,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
- if (!GuaranteedTailCallOpt && isTailCall)
+ if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
@@ -2081,7 +2181,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+ else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -2231,12 +2332,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
- assert((Subtarget->hasXMM() || !NumXMMRegs)
+ assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@@ -2260,7 +2361,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
int FI = 0;
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
- if (GuaranteedTailCallOpt) {
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
@@ -2368,7 +2469,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (ExtraLoad)
Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(),
- false, false, 0);
+ false, false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
@@ -2421,6 +2522,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Is64Bit && isVarArg && !IsWin64)
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -2440,12 +2547,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
- else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
+ else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+ IsStructRet)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
+ // For MSVC Win32 targets, the caller pops the hidden struct pointer.
NumBytesForCalleeToPush = 4;
else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
@@ -2598,7 +2708,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
- if (GuaranteedTailCallOpt) {
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
@@ -2641,9 +2751,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
}
- // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
- // Therefore if it's not used by the call it is not safe to optimize this into
- // a sibcall.
+ // If the call result is in ST0 / ST1, it needs to be popped off the x87
+ // stack. Therefore, if it's not used by the call it is not safe to optimize
+ // this into a sibcall.
bool Unused = false;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
if (!Ins[i].Used) {
@@ -2785,9 +2895,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
- case X86ISD::SHUFPD:
+ case X86ISD::SHUFP:
case X86ISD::PALIGN:
- case X86ISD::SHUFPS:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
@@ -2798,34 +2907,16 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
- case X86ISD::VPERM2F128:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
return true;
}
- return false;
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, SelectionDAG &DAG) {
+ SDValue V1, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
@@ -2833,39 +2924,32 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
-
- return SDValue();
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ SDValue V1, unsigned TargetMask,
+ SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
+ case X86ISD::VPERMILP:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
-
- return SDValue();
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ SDValue V1, SDValue V2, unsigned TargetMask,
+ SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGN:
- case X86ISD::SHUFPD:
- case X86ISD::SHUFPS:
- case X86ISD::VPERM2F128:
+ case X86ISD::SHUFP:
+ case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
- return SDValue();
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
@@ -2879,25 +2963,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
- return SDValue();
}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
@@ -3092,17 +3161,6 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
-/// isUndefOrInRange - Return true if every element in Mask, begining
-/// from position Pos and ending in Pos+Size, falls within the specified
-/// range (L, L+Pos]. or is undef.
-static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask,
- int Pos, int Size, int Low, int Hi) {
- for (int i = Pos, e = Pos+Size; i != e; ++i)
- if (!isUndefOrInRange(Mask[i], Low, Hi))
- return false;
- return true;
-}
-
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
@@ -3114,7 +3172,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
/// isSequentialOrUndefInRange - Return true if every element in Mask, begining
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (L, L+Pos]. or is undef.
-static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
int Pos, int Size, int Low) {
for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
if (!isUndefOrEqual(Mask[i], Low))
@@ -3125,7 +3183,7 @@ static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
-static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
@@ -3133,302 +3191,188 @@ static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
return false;
}
-bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isPSHUFDMask(M, N->getValueType(0));
-}
-
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT) {
if (VT != MVT::v8i16)
return false;
// Lower quadword copied in order or undef.
- for (int i = 0; i != 4; ++i)
- if (Mask[i] >= 0 && Mask[i] != i)
- return false;
+ if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
+ return false;
// Upper quadword shuffled.
- for (int i = 4; i != 8; ++i)
+ for (unsigned i = 4; i != 8; ++i)
if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
return false;
return true;
}
-bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isPSHUFHWMask(M, N->getValueType(0));
-}
-
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT) {
if (VT != MVT::v8i16)
return false;
// Upper quadword copied in order.
- for (int i = 4; i != 8; ++i)
- if (Mask[i] >= 0 && Mask[i] != i)
- return false;
+ if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
+ return false;
// Lower quadword shuffled.
- for (int i = 0; i != 4; ++i)
+ for (unsigned i = 0; i != 4; ++i)
if (Mask[i] >= 4)
return false;
return true;
}
-bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isPSHUFLWMask(M, N->getValueType(0));
-}
-
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
-static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool hasSSSE3OrAVX) {
- int i, e = VT.getVectorNumElements();
- if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
+ (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
return false;
- // Do not handle v2i64 / v2f64 shuffles with palignr.
- if (e < 4 || !hasSSSE3OrAVX)
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ // Do not handle 64-bit element shuffles with palignr.
+ if (NumLaneElts == 2)
return false;
- for (i = 0; i != e; ++i)
- if (Mask[i] >= 0)
- break;
+ for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
+ unsigned i;
+ for (i = 0; i != NumLaneElts; ++i) {
+ if (Mask[i+l] >= 0)
+ break;
+ }
- // All undef, not a palignr.
- if (i == e)
- return false;
+ // Lane is all undef, go to next lane
+ if (i == NumLaneElts)
+ continue;
- // Make sure we're shifting in the right direction.
- if (Mask[i] <= i)
- return false;
+ int Start = Mask[i+l];
- int s = Mask[i] - i;
+ // Make sure its in this lane in one of the sources
+ if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
+ !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
+ return false;
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != e; ++i) {
- int m = Mask[i];
- if (m >= 0 && m != s+i)
+ // If not lane 0, then we must match lane 0
+ if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
return false;
- }
- return true;
-}
-/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPSY.
-static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- int NumElems = VT.getVectorNumElements();
+ // Correct second source to be contiguous with first source
+ if (Start >= (int)NumElts)
+ Start -= NumElts - NumLaneElts;
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
- return false;
+ // Make sure we're shifting in the right direction.
+ if (Start <= (int)(i+l))
+ return false;
- if (NumElems != 8)
- return false;
+ Start -= i;
- // VSHUFPSY divides the resulting vector into 4 chunks.
- // The sources are also splitted into 4 chunks, and each destination
- // chunk must come from a different source chunk.
- //
- // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
- // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
- //
- // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
- // Y3..Y0, Y3..Y0, X3..X0, X3..X0
- //
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != NumLaneElts; ++i) {
+ int Idx = Mask[i+l];
- // The mask of the second half must be the same as the first but with
- // the appropriate offsets. This works in the same way as VPERMILPS
- // works with masks.
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
- }
- for (int i = QuarterSize*3; i < NumElems; ++i) {
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
+ // Make sure its in this lane
+ if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
+ !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
+ return false;
+
+ // If not lane 0, then we must match lane 0
+ if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
+ return false;
+
+ if (Idx >= (int)NumElts)
+ Idx -= NumElts - NumLaneElts;
+ if (!isUndefOrEqual(Idx, Start+i))
+ return false;
+
+ }
}
return true;
}
-/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
-static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
-
- assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
- "Only supports v8i32 and v8f32 types");
-
- int HalfSize = NumElems/2;
- unsigned Mask = 0;
- for (int i = 0; i != NumElems ; ++i) {
- if (SVOp->getMaskElt(i) < 0)
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+ unsigned NumElems) {
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
continue;
- // The mask of the first half must be equal to the second one.
- unsigned Shamt = (i%HalfSize)*2;
- unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
- Mask |= Elt << Shamt;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
}
-
- return Mask;
}
-/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
-/// version and the mask of the second half isn't binded with the first
-/// one.
-static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- int NumElems = VT.getVectorNumElements();
-
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 128/256-bit
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
+ bool Commuted = false) {
+ if (!HasAVX && VT.getSizeInBits() == 256)
return false;
- if (NumElems != 4)
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElems = NumElems/NumLanes;
+
+ if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
// VSHUFPSY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
//
+ // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
+ // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
+ //
+ // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
+ // Y3..Y0, Y3..Y0, X3..X0, X3..X0
+ //
+ // VSHUFPDY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
// SRC1 => X3 X2 X1 X0
// SRC2 => Y3 Y2 Y1 Y0
//
- // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
+ // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- for (int i = QuarterSize*3; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
-
- return true;
-}
-
-/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
-static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
-
- assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
- "Only supports v4i64 and v4f64 types");
-
- int HalfSize = NumElems/2;
- unsigned Mask = 0;
- for (int i = 0; i != NumElems ; ++i) {
- if (SVOp->getMaskElt(i) < 0)
- continue;
- int Elt = SVOp->getMaskElt(i) % HalfSize;
- Mask |= Elt << i;
+ unsigned HalfLaneElems = NumLaneElems/2;
+ for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
+ for (unsigned i = 0; i != NumLaneElems; ++i) {
+ int Idx = Mask[i+l];
+ unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
+ if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
+ return false;
+ // For VSHUFPSY, the mask of the second half must be the same as the
+ // first but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
+ if (NumElems != 8 || l == 0 || Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Idx, Mask[i]+l))
+ return false;
+ }
}
- return Mask;
-}
-
-/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 128-bit
-/// SHUFPS and SHUFPD.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
- return false;
-
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
- return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
- return false;
-
- return true;
-}
-
-bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isSHUFPMask(M, N->getValueType(0));
-}
-
-/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
-
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
- return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
- return false;
return true;
}
-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return isCommutedSHUFPMask(M, N->getValueType(0));
-}
-
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
@@ -3438,17 +3382,16 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
return false;
// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
- return isUndefOrEqual(N->getMaskElt(0), 6) &&
- isUndefOrEqual(N->getMaskElt(1), 7) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
- isUndefOrEqual(N->getMaskElt(3), 3);
+ return isUndefOrEqual(Mask[0], 6) &&
+ isUndefOrEqual(Mask[1], 7) &&
+ isUndefOrEqual(Mask[2], 2) &&
+ isUndefOrEqual(Mask[3], 3);
}
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
-bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
@@ -3457,26 +3400,29 @@ bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
if (NumElems != 4)
return false;
- return isUndefOrEqual(N->getMaskElt(0), 2) &&
- isUndefOrEqual(N->getMaskElt(1), 3) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
- isUndefOrEqual(N->getMaskElt(3), 3);
+ return isUndefOrEqual(Mask[0], 2) &&
+ isUndefOrEqual(Mask[1], 3) &&
+ isUndefOrEqual(Mask[2], 2) &&
+ isUndefOrEqual(Mask[3], 3);
}
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
+static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
- for (unsigned i = 0; i < NumElems/2; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ if (!isUndefOrEqual(Mask[i], i + NumElems))
return false;
- for (unsigned i = NumElems/2; i < NumElems; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), i))
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
return false;
return true;
@@ -3484,19 +3430,19 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
-bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
+static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+ unsigned NumElems = VT.getVectorNumElements();
if ((NumElems != 2 && NumElems != 4)
- || N->getValueType(0).getSizeInBits() > 128)
+ || VT.getSizeInBits() > 128)
return false;
- for (unsigned i = 0; i < NumElems/2; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), i))
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
return false;
- for (unsigned i = 0; i < NumElems/2; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems))
return false;
return true;
@@ -3504,14 +3450,15 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool V2IsSplat = false) {
- int NumElts = VT.getVectorNumElements();
+static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
+ bool HasAVX2, bool V2IsSplat = false) {
+ unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3519,11 +3466,9 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
- unsigned Start = 0;
- unsigned End = NumLaneElts;
- for (unsigned s = 0; s < NumLanes; ++s) {
- for (unsigned i = Start, j = s * NumLaneElts;
- i != End;
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = l*NumLaneElts;
+ i != (l+1)*NumLaneElts;
i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -3537,30 +3482,22 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
return false;
}
}
- // Process the next 128 bits.
- Start += NumLaneElts;
- End += NumLaneElts;
}
return true;
}
-bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
-}
-
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool V2IsSplat = false) {
- int NumElts = VT.getVectorNumElements();
+static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
+ bool HasAVX2, bool V2IsSplat = false) {
+ unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3568,11 +3505,9 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
- unsigned Start = 0;
- unsigned End = NumLaneElts;
for (unsigned l = 0; l != NumLanes; ++l) {
- for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
- i != End; i += 2, ++j) {
+ for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != (l+1)*NumLaneElts; i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
if (!isUndefOrEqual(BitI, j))
@@ -3585,42 +3520,39 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
return false;
}
}
- // Process the next 128 bits.
- Start += NumLaneElts;
- End += NumLaneElts;
}
return true;
}
-bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
-}
-
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
- if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
+ bool HasAVX2) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
// FIXME: Need a better way to get rid of this, there's no latency difference
// between UNPCKLPD and MOVDDUP, the later should always be checked first and
// the former later. We should also remove the "_undef" special mask.
- if (NumElems == 4 && VT.getSizeInBits() == 256)
+ if (NumElts == 4 && VT.getSizeInBits() == 256)
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
// independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits() / 128;
- unsigned NumLaneElts = NumElems / NumLanes;
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
- for (unsigned s = 0; s < NumLanes; ++s) {
- for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
- i != NumLaneElts * (s + 1);
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = l*NumLaneElts;
+ i != (l+1)*NumLaneElts;
i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -3635,81 +3567,77 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
return true;
}
-bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
-}
-
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
- if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
- for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (!isUndefOrEqual(BitI1, j))
- return false;
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != (l+1)*NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
}
return true;
}
-bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
-}
-
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
-static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
+ if (VT.getSizeInBits() == 256)
+ return false;
- int NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorNumElements();
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
- for (int i = 1; i < NumElts; ++i)
+ for (unsigned i = 1; i != NumElts; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
return true;
}
-bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isMOVLMask(M, N->getValueType(0));
-}
-
-/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
/// as permutations between 128-bit chunks or halves. As an example: this
/// shuffle bellow:
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
+ if (!HasAVX || VT.getSizeInBits() != 256)
return false;
// The shuffle result is divided into half A and half B. In total the two
// sources have 4 halves, namely: C, D, E, F. The final values of A and
// B must come from C, D, E or F.
- int HalfSize = VT.getVectorNumElements()/2;
+ unsigned HalfSize = VT.getVectorNumElements()/2;
bool MatchA = false, MatchB = false;
// Check if A comes from one of C, D, E, F.
- for (int Half = 0; Half < 4; ++Half) {
+ for (unsigned Half = 0; Half != 4; ++Half) {
if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
MatchA = true;
break;
@@ -3717,7 +3645,7 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
}
// Check if B comes from one of C, D, E, F.
- for (int Half = 0; Half < 4; ++Half) {
+ for (unsigned Half = 0; Half != 4; ++Half) {
if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
MatchB = true;
break;
@@ -3727,22 +3655,21 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
return MatchA && MatchB;
}
-/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
-static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
- int HalfSize = VT.getVectorNumElements()/2;
+ unsigned HalfSize = VT.getVectorNumElements()/2;
- int FstHalf = 0, SndHalf = 0;
- for (int i = 0; i < HalfSize; ++i) {
+ unsigned FstHalf = 0, SndHalf = 0;
+ for (unsigned i = 0; i < HalfSize; ++i) {
if (SVOp->getMaskElt(i) > 0) {
FstHalf = SVOp->getMaskElt(i)/HalfSize;
break;
}
}
- for (int i = HalfSize; i < HalfSize*2; ++i) {
+ for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
if (SVOp->getMaskElt(i) > 0) {
SndHalf = SVOp->getMaskElt(i)/HalfSize;
break;
@@ -3752,141 +3679,56 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
return (FstHalf | (SndHalf << 4));
}
-/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- int NumElts = VT.getVectorNumElements();
- int NumLanes = VT.getSizeInBits()/128;
-
- if (!Subtarget->hasAVX())
- return false;
-
- // Only match 256-bit with 64-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 4)
+/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
+ if (!HasAVX)
return false;
- // The mask on the high lane is independent of the low. Both can match
- // any element in inside its own lane, but can't cross.
- int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int LaneStart = l*LaneSize;
- if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
- return false;
- }
-
- return true;
-}
-
-/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
-
- if (!Subtarget->hasAVX())
+ // Only match 256-bit with 32/64-bit types
+ if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
return false;
- // Only match 256-bit with 32-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 8)
- return false;
-
- // The mask on the high lane should be the same as the low. Actually,
- // they can differ if any of the corresponding index in a lane is undef
- // and the other stays in range.
- int LaneSize = NumElts/NumLanes;
- for (int i = 0; i < LaneSize; ++i) {
- int HighElt = i+LaneSize;
- bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
- bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
-
- if (!HighValid || !LowValid)
- return false;
- if (Mask[i] < 0 || Mask[HighElt] < 0)
- continue;
- if (Mask[HighElt]-Mask[i] != LaneSize)
- return false;
- }
-
- return true;
-}
-
-/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
-static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
-
- int NumElts = VT.getVectorNumElements();
- int NumLanes = VT.getSizeInBits()/128;
- int LaneSize = NumElts/NumLanes;
-
- // Although the mask is equal for both lanes do it twice to get the cases
- // where a mask will match because the same mask element is undef on the
- // first half but valid on the second. This would get pathological cases
- // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
- unsigned Mask = 0;
- for (int l = 0; l < NumLanes; ++l) {
- for (int i = 0; i < LaneSize; ++i) {
- int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
- if (MaskElt < 0)
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned LaneSize = NumElts/NumLanes;
+ for (unsigned l = 0; l != NumElts; l += LaneSize) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
+ return false;
+ if (NumElts != 8 || l == 0)
continue;
- if (MaskElt >= LaneSize)
- MaskElt -= LaneSize;
- Mask |= MaskElt << (i*2);
- }
- }
-
- return Mask;
-}
-
-/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
-static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
-
- int NumElts = VT.getVectorNumElements();
- int NumLanes = VT.getSizeInBits()/128;
-
- unsigned Mask = 0;
- int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int MaskElt = SVOp->getMaskElt(i);
- if (MaskElt < 0)
+ // VPERMILPS handling
+ if (Mask[i] < 0)
continue;
- Mask |= (MaskElt-l*LaneSize) << i;
+ if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
+ return false;
}
+ }
- return Mask;
+ return true;
}
-/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
- int NumOps = VT.getVectorNumElements();
+ unsigned NumOps = VT.getVectorNumElements();
+ if (VT.getSizeInBits() == 256)
+ return false;
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
if (!isUndefOrEqual(Mask[0], 0))
return false;
- for (int i = 1; i < NumOps; ++i)
+ for (unsigned i = 1; i != NumOps; ++i)
if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
(V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
(V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
@@ -3895,26 +3737,14 @@ static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
return true;
}
-static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
- bool V2IsUndef = false) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
-}
-
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
-bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
- const X86Subtarget *Subtarget) {
- if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
- return false;
-
- // The second vector must be undef
- if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3())
return false;
- EVT VT = N->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
@@ -3922,9 +3752,9 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
return false;
// "i+1" is the value the indexed mask element must have
- for (unsigned i = 0; i < NumElems; i += 2)
- if (!isUndefOrEqual(N->getMaskElt(i), i+1) ||
- !isUndefOrEqual(N->getMaskElt(i+1), i+1))
+ for (unsigned i = 0; i != NumElems; i += 2)
+ if (!isUndefOrEqual(Mask[i], i+1) ||
+ !isUndefOrEqual(Mask[i+1], i+1))
return false;
return true;
@@ -3933,16 +3763,11 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
-bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
- const X86Subtarget *Subtarget) {
- if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
- return false;
-
- // The second vector must be undef
- if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3())
return false;
- EVT VT = N->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
@@ -3950,9 +3775,9 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
return false;
// "i" is the value the indexed mask element must have
- for (unsigned i = 0; i < NumElems; i += 2)
- if (!isUndefOrEqual(N->getMaskElt(i), i) ||
- !isUndefOrEqual(N->getMaskElt(i+1), i))
+ for (unsigned i = 0; i != NumElems; i += 2)
+ if (!isUndefOrEqual(Mask[i], i) ||
+ !isUndefOrEqual(Mask[i+1], i))
return false;
return true;
@@ -3961,21 +3786,17 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
- const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
- int NumElts = VT.getVectorNumElements();
- bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
+ unsigned NumElts = VT.getVectorNumElements();
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
- !V2IsUndef || NumElts != 4)
+ if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
return false;
- for (int i = 0; i != NumElts/2; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), 0))
+ for (unsigned i = 0; i != NumElts/2; ++i)
+ if (!isUndefOrEqual(Mask[i], 0))
return false;
- for (int i = NumElts/2; i != NumElts; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+ for (unsigned i = NumElts/2; i != NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], NumElts/2))
return false;
return true;
}
@@ -3983,18 +3804,16 @@ static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
-bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
-
+static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getSizeInBits() != 128)
return false;
- int e = VT.getVectorNumElements() / 2;
- for (int i = 0; i < e; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), i))
+ unsigned e = VT.getVectorNumElements() / 2;
+ for (unsigned i = 0; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
return false;
- for (int i = 0; i < e; ++i)
- if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+ for (unsigned i = 0; i != e; ++i)
+ if (!isUndefOrEqual(Mask[e+i], i))
return false;
return true;
}
@@ -4039,31 +3858,43 @@ bool X86::isVINSERTF128Index(SDNode *N) {
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
-unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+/// Handles 128-bit and 256-bit.
+static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for PSHUF/SHUFP");
- unsigned Shift = (NumOperands == 4) ? 2 : 1;
+ // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
+ // independently on 128-bit lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ assert((NumLaneElts == 2 || NumLaneElts == 4) &&
+ "Only supports 2 or 4 elements per lane");
+
+ unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
unsigned Mask = 0;
- for (int i = 0; i < NumOperands; ++i) {
- int Val = SVOp->getMaskElt(NumOperands-i-1);
- if (Val < 0) Val = 0;
- if (Val >= NumOperands) Val -= NumOperands;
- Mask |= Val;
- if (i != NumOperands - 1)
- Mask <<= Shift;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt < 0) continue;
+ Elt %= NumLaneElts;
+ unsigned ShAmt = i << Shift;
+ if (ShAmt >= 8) ShAmt -= 8;
+ Mask |= Elt << ShAmt;
}
+
return Mask;
}
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
-unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
unsigned Mask = 0;
// 8 nodes, but we only care about the last 4.
for (unsigned i = 7; i >= 4; --i) {
- int Val = SVOp->getMaskElt(i);
+ int Val = N->getMaskElt(i);
if (Val >= 0)
Mask |= (Val - 4);
if (i != 4)
@@ -4074,12 +3905,11 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
-unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
unsigned Mask = 0;
// 8 nodes, but we only care about the first 4.
for (int i = 3; i >= 0; --i) {
- int Val = SVOp->getMaskElt(i);
+ int Val = N->getMaskElt(i);
if (Val >= 0)
Mask |= Val;
if (i != 0)
@@ -4090,18 +3920,24 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VVT = N->getValueType(0);
- unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
- int Val = 0;
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
- unsigned i, e;
- for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+ int Val = 0;
+ unsigned i;
+ for (i = 0; i != NumElts; ++i) {
Val = SVOp->getMaskElt(i);
if (Val >= 0)
break;
}
+ if (Val >= (int)NumElts)
+ Val -= NumElts - NumLaneElts;
+
assert(Val - i > 0 && "PALIGNR imm should be positive");
return (Val - i) * EltSize;
}
@@ -4170,36 +4006,20 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SVOp->getOperand(0), &MaskVec[0]);
}
-/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
-/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
- for (unsigned i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElems)
- Mask[i] = idx + NumElems;
- else
- Mask[i] = idx - NumElems;
- }
-}
-
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
-static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
- EVT VT = Op->getValueType(0);
+static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
if (VT.getSizeInBits() != 128)
return false;
if (VT.getVectorNumElements() != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
- if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+ if (!isUndefOrEqual(Mask[i], i+2))
return false;
for (unsigned i = 2; i != 4; ++i)
- if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+ if (!isUndefOrEqual(Mask[i], i+4))
return false;
return true;
}
@@ -4218,14 +4038,36 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
return true;
}
+// Test whether the given value is a vector value which will be legalized
+// into a load.
+static bool WillBeConstantPoolLoad(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ // Check for any non-constant elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ switch (N->getOperand(i).getNode()->getOpcode()) {
+ case ISD::UNDEF:
+ case ISD::ConstantFP:
+ case ISD::Constant:
+ break;
+ default:
+ return false;
+ }
+
+ // Vectors of all-zeros and all-ones are materialized with special
+ // instructions rather than being loaded.
+ return !ISD::isBuildVectorAllZeros(N) &&
+ !ISD::isBuildVectorAllOnes(N);
+}
+
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
/// match movlp{s|d}. The lower half elements should come from lower half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
- ShuffleVectorSDNode *Op) {
- EVT VT = Op->getValueType(0);
+ ArrayRef<int> Mask, EVT VT) {
if (VT.getSizeInBits() != 128)
return false;
@@ -4233,7 +4075,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
// load folding shufps op.
- if (ISD::isNON_EXTLoad(V2))
+ if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -4241,10 +4083,10 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
- if (!isUndefOrEqual(Op->getMaskElt(i), i))
+ if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = NumElems/2; i != NumElems; ++i)
- if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+ if (!isUndefOrEqual(Mask[i], i+NumElems))
return false;
return true;
}
@@ -4292,15 +4134,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
- DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 128) { // SSE
- if (HasXMMInt) { // SSE2
+ if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
@@ -4308,34 +4150,46 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.getSizeInBits() == 256) { // AVX
- // 256-bit logic and arithmetic instructions in AVX are
- // all floating-point, no support for integer ops. Default
- // to emitting fp zeroed vectors then.
- SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ if (Subtarget->hasAVX2()) { // AVX2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else {
+ // 256-bit logic and arithmetic instructions in AVX are all
+ // floating-point, no support for integer ops. Emit fp zeroed vectors.
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ }
}
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
/// getOnesVector - Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
-/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
-/// original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
+/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Then bitcast to their original type, ensuring they get CSE'd.
+static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+ DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
assert((VT.is128BitVector() || VT.is256BitVector())
&& "Expected a 128-bit or 256-bit vector type");
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- Cst, Cst, Cst, Cst);
-
- if (VT.is256BitVector()) {
- SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
- Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
- Vec = Insert128BitVector(InsV, Vec,
- DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ SDValue Vec;
+ if (VT.getSizeInBits() == 256) {
+ if (HasAVX2) { // AVX2
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else { // AVX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
+ Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Vec = Insert128BitVector(InsV, Vec,
+ DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ }
+ } else {
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
@@ -4343,24 +4197,12 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
-static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- bool Changed = false;
- SmallVector<int, 8> MaskVec;
- SVOp->getMask(MaskVec);
-
+static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
- if (MaskVec[i] > (int)NumElems) {
- MaskVec[i] = NumElems;
- Changed = true;
+ if (Mask[i] > (int)NumElems) {
+ Mask[i] = NumElems;
}
}
- if (Changed)
- return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
- SVOp->getOperand(1), &MaskVec[0]);
- return SDValue(SVOp, 0);
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
@@ -4464,7 +4306,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
if (Size == 256) {
- unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0;
+ unsigned Idx = (EltNo >= NumElems/2) ? NumElems/2 : 0;
V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
if (Idx > 0)
EltNo -= NumElems/2;
@@ -4496,11 +4338,12 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
- bool isZero, bool HasXMMInt,
+ bool IsZero,
+ const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT VT = V2.getValueType();
- SDValue V1 = isZero
- ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ SDValue V1 = IsZero
+ ? getZeroVector(VT, Subtarget, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
@@ -4509,9 +4352,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
}
+/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
+/// target specific opcode. Returns true if the Mask could be calculated.
+/// Sets IsUnary to true if only uses one source.
+static bool getTargetShuffleMask(SDNode *N, EVT VT,
+ SmallVectorImpl<int> &Mask, bool &IsUnary) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue ImmN;
+
+ IsUnary = false;
+ switch(N->getOpcode()) {
+ case X86ISD::SHUFP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::UNPCKH:
+ DecodeUNPCKHMask(VT, Mask);
+ break;
+ case X86ISD::UNPCKL:
+ DecodeUNPCKLMask(VT, Mask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, Mask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, Mask);
+ break;
+ case X86ISD::PSHUFD:
+ case X86ISD::VPERMILP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i) {
+ Mask.push_back(i);
+ }
+ break;
+ }
+ case X86ISD::VPERM2X128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::PALIGN:
+ // Not yet implemented
+ return false;
+ default: llvm_unreachable("unknown target shuffle node");
+ }
+
+ return true;
+}
+
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
-static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
@@ -4522,129 +4437,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
- Index = SV->getMaskElt(Index);
+ int Elt = SV->getMaskElt(Index);
- if (Index < 0)
+ if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- int NumElems = VT.getVectorNumElements();
- SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
+ : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
- int NumElems = VT.getVectorNumElements();
- SmallVector<unsigned, 16> ShuffleMask;
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 16> ShuffleMask;
SDValue ImmN;
+ bool IsUnary;
- switch(Opcode) {
- case X86ISD::SHUFPS:
- case X86ISD::SHUFPD:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPSMask(NumElems,
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- DecodePUNPCKHMask(NumElems, ShuffleMask);
- break;
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
- DecodeUNPCKHPMask(NumElems, ShuffleMask);
- break;
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- DecodePUNPCKLMask(VT, ShuffleMask);
- break;
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
- DecodeUNPCKLPMask(VT, ShuffleMask);
- break;
- case X86ISD::MOVHLPS:
- DecodeMOVHLPSMask(NumElems, ShuffleMask);
- break;
- case X86ISD::MOVLHPS:
- DecodeMOVLHPSMask(NumElems, ShuffleMask);
- break;
- case X86ISD::PSHUFD:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFMask(NumElems,
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PSHUFHW:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PSHUFLW:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::MOVSS:
- case X86ISD::MOVSD: {
- // The index 0 always comes from the first element of the second source,
- // this is why MOVSS and MOVSD are used in the first place. The other
- // elements come from the other positions of the first source vector.
- unsigned OpNum = (Index == 0) ? 1 : 0;
- return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
- Depth+1);
- }
- case X86ISD::VPERMILPS:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPSY:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPD:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPDY:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERM2F128:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::MOVDDUP:
- case X86ISD::MOVLHPD:
- case X86ISD::MOVLPD:
- case X86ISD::MOVLPS:
- case X86ISD::MOVSHDUP:
- case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
- return SDValue(); // Not yet implemented.
- default:
- assert(0 && "unknown target shuffle node");
+ if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
return SDValue();
- }
- Index = ShuffleMask[Index];
- if (Index < 0)
+ int Elt = ShuffleMask[Index];
+ if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
+ : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -4660,7 +4480,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Index == 0) ? V.getOperand(0)
- : DAG.getUNDEF(VT.getVectorElementType());
+ : DAG.getUNDEF(VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Index);
@@ -4672,38 +4492,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
/// shuffle operation which come from a consecutively from a zero. The
/// search can start in two different directions, from left or right.
static
-unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems,
bool ZerosFromLeft, SelectionDAG &DAG) {
- int i = 0;
-
- while (i < NumElems) {
+ unsigned i;
+ for (i = 0; i != NumElems; ++i) {
unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
- SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
if (!(Elt.getNode() &&
(Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
break;
- ++i;
}
return i;
}
-/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
-/// MaskE correspond consecutively to elements from one of the vector operands,
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
+/// correspond consecutively to elements from one of the vector operands,
/// starting from its index OpIdx. Also tell OpNum which source vector operand.
static
-bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
- int OpIdx, int NumElems, unsigned &OpNum) {
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
+ unsigned MaskI, unsigned MaskE, unsigned OpIdx,
+ unsigned NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
- for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
int Idx = SVOp->getMaskElt(i);
// Ignore undef indicies
if (Idx < 0)
continue;
- if (Idx < NumElems)
+ if (Idx < (int)NumElems)
SeenV1 = true;
else
SeenV2 = true;
@@ -4738,7 +4557,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
//
if (!isShuffleMaskConsecutive(SVOp,
0, // Mask Start Index
- NumElems-NumZeros-1, // Mask End Index
+ NumElems-NumZeros, // Mask End Index(exclusive)
NumZeros, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
@@ -4771,7 +4590,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
//
if (!isShuffleMaskConsecutive(SVOp,
NumZeros, // Mask Start Index
- NumElems-1, // Mask End Index
+ NumElems, // Mask End Index(exclusive)
0, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
@@ -4804,6 +4623,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
+ const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 8)
return SDValue();
@@ -4815,7 +4635,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
@@ -4851,6 +4671,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
+ const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 4)
return SDValue();
@@ -4863,7 +4684,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
if (isNonZero) {
if (First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
@@ -4884,7 +4705,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
const TargetLowering &TLI, DebugLoc dl) {
assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
- unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
@@ -4952,21 +4773,16 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
int EltNo = (Offset - StartOffset) >> 2;
int NumElems = VT.getVectorNumElements();
- EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
- false, false, 0);
+ false, false, false, 0);
- // Canonicalize it to a v4i32 or v8i32 shuffle.
SmallVector<int, 8> Mask;
for (int i = 0; i < NumElems; ++i)
Mask.push_back(EltNo);
- V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
- return DAG.getNode(ISD::BITCAST, dl, NVT,
- DAG.getVectorShuffle(CanonVT, dl, V1,
- DAG.getUNDEF(CanonVT),&Mask[0]));
+ return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
return SDValue();
@@ -5021,11 +4837,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
- LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->isInvariant(), 0);
return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
- LDBase->getAlignment());
+ LDBase->isInvariant(), LDBase->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1 &&
DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
@@ -5041,6 +4858,137 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
return SDValue();
}
+/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
+/// to generate a splat value for the following cases:
+/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
+/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
+/// a scalar load, or a constant.
+/// The VBROADCAST node is returned when a pattern is found,
+/// or SDValue() otherwise.
+SDValue
+X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Ld;
+ bool ConstSplatVal;
+
+ switch (Op.getOpcode()) {
+ default:
+ // Unknown pattern found.
+ return SDValue();
+
+ case ISD::BUILD_VECTOR: {
+ // The BUILD_VECTOR node must be a splat.
+ if (!isSplatVector(Op.getNode()))
+ return SDValue();
+
+ Ld = Op.getOperand(0);
+ ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+ Ld.getOpcode() == ISD::ConstantFP);
+
+ // The suspected load node has several users. Make sure that all
+ // of its users are from the BUILD_VECTOR node.
+ // Constants may have multiple users.
+ if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+ return SDValue();
+ break;
+ }
+
+ case ISD::VECTOR_SHUFFLE: {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+
+ // Shuffles must have a splat mask where the first element is
+ // broadcasted.
+ if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
+ return SDValue();
+
+ SDValue Sc = Op.getOperand(0);
+ if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return SDValue();
+
+ Ld = Sc.getOperand(0);
+ ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+ Ld.getOpcode() == ISD::ConstantFP);
+
+ // The scalar_to_vector node and the suspected
+ // load node must have exactly one user.
+ // Constants may have multiple users.
+ if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse()))
+ return SDValue();
+ break;
+ }
+ }
+
+ bool Is256 = VT.getSizeInBits() == 256;
+ bool Is128 = VT.getSizeInBits() == 128;
+
+ // Handle the broadcasting a single constant scalar from the constant pool
+ // into a vector. On Sandybridge it is still better to load a constant vector
+ // from the constant pool and not to broadcast it from a scalar.
+ if (ConstSplatVal && Subtarget->hasAVX2()) {
+ EVT CVT = Ld.getValueType();
+ assert(!CVT.isVector() && "Must not broadcast a vector type");
+ unsigned ScalarSize = CVT.getSizeInBits();
+
+ if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
+ (Is128 && (ScalarSize == 32))) {
+
+ const Constant *C = 0;
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
+ C = CI->getConstantIntValue();
+ else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
+ C = CF->getConstantFPValue();
+
+ assert(C && "Invalid constant type");
+
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ }
+ }
+
+ // The scalar source must be a normal load.
+ if (!ISD::isNormalLoad(Ld.getNode()))
+ return SDValue();
+
+ // Reject loads that have uses of the chain result
+ if (Ld->hasAnyUseOfValue(1))
+ return SDValue();
+
+ unsigned ScalarSize = Ld.getValueType().getSizeInBits();
+
+ // VBroadcast to YMM
+ if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+ // VBroadcast to XMM
+ if (Is128 && (ScalarSize == 32))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+ // The integer check is needed for the 64-bit into 128-bit so it doesn't match
+ // double since there is vbroadcastsd xmm
+ if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
+ // VBroadcast to YMM
+ if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+ // VBroadcast to XMM
+ if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ }
+
+ // Unsupported broadcast.
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5053,22 +5001,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 ||
- Op.getValueType() == MVT::v8i32)
+ if (VT == MVT::v4i32 || VT == MVT::v8i32)
return Op;
- return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl);
+ return getZeroVector(VT, Subtarget, DAG, dl);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
- // vectors or broken into v4i32 operations on 256-bit vectors.
+ // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
+ // vpcmpeqd on 256-bit vectors.
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (Op.getValueType() == MVT::v4i32)
+ if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
return Op;
- return getOnesVector(Op.getValueType(), DAG, dl);
+ return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
}
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ if (Broadcast.getNode())
+ return Broadcast;
+
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
@@ -5118,8 +5070,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasXMMInt(), DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
@@ -5132,7 +5083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG.getUNDEF(Item.getValueType()),
&Mask[0]);
}
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -5141,21 +5092,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// the rest of the elements. This will be matched as movd/movq/movss/movsd
// depending on what the source datatype is.
if (Idx == 0) {
- if (NumZero == 0) {
+ if (NumZero == 0)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+
+ if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+ if (VT.getSizeInBits() == 256) {
+ SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
+ Item, DAG.getIntPtrConstant(0));
+ }
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(),
- DAG);
- } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+ return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+ }
+
+ if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
- EVT MiddleVT = MVT::v4i32;
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasXMMInt(), DAG);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+ if (VT.getSizeInBits() == 256) {
+ SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+ Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32),
+ DAG, dl);
+ } else {
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+ }
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -5183,8 +5146,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
- Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
- Subtarget->hasXMMInt(), DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(i == Idx ? 0 : 1);
@@ -5214,9 +5176,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ if (VT.getSizeInBits() == 256) {
SmallVector<SDValue, 32> V;
- for (unsigned i = 0; i < NumElems; ++i)
+ for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
@@ -5240,8 +5202,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
- return getShuffleVectorZeroOrUndef(V2, Idx, true,
- Subtarget->hasXMMInt(), DAG);
+ return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
}
@@ -5249,24 +5210,23 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ Subtarget, *this);
if (V.getNode()) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ Subtarget, *this);
if (V.getNode()) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
- SmallVector<SDValue, 8> V;
- V.resize(NumElems);
+ SmallVector<SDValue, 8> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
+ V[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
@@ -5289,13 +5249,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- SmallVector<int, 8> MaskVec;
- bool Reverse = (NonZeros & 0x3) == 2;
- for (unsigned i = 0; i < 2; ++i)
- MaskVec.push_back(Reverse ? 1-i : i);
- Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
- for (unsigned i = 0; i < 2; ++i)
- MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+ bool Reverse1 = (NonZeros & 0x3) == 2;
+ bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ int MaskVec[] = {
+ Reverse1 ? 1 : 0,
+ Reverse1 ? 0 : 1,
+ static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
+ static_cast<int>(Reverse2 ? NumElems : NumElems+1)
+ };
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
@@ -5310,7 +5271,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return LD;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) {
+ if (getSubtarget()->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -5422,6 +5383,85 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
+// Try to lower a shuffle node into a simple blend instruction.
+static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT InVT = V1.getValueType();
+ int MaskSize = VT.getVectorNumElements();
+ int InSize = InVT.getVectorNumElements();
+
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ if (MaskSize != InSize)
+ return SDValue();
+
+ int ISDNo = 0;
+ MVT OpTy;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v8i16:
+ ISDNo = X86ISD::BLENDPW;
+ OpTy = MVT::v8i16;
+ break;
+ case MVT::v4i32:
+ case MVT::v4f32:
+ ISDNo = X86ISD::BLENDPS;
+ OpTy = MVT::v4f32;
+ break;
+ case MVT::v2i64:
+ case MVT::v2f64:
+ ISDNo = X86ISD::BLENDPD;
+ OpTy = MVT::v2f64;
+ break;
+ case MVT::v8i32:
+ case MVT::v8f32:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPS;
+ OpTy = MVT::v8f32;
+ break;
+ case MVT::v4i64:
+ case MVT::v4f64:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPD;
+ OpTy = MVT::v4f64;
+ break;
+ case MVT::v16i16:
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPW;
+ OpTy = MVT::v16i16;
+ break;
+ }
+ assert(ISDNo && "Invalid Op Number");
+
+ unsigned MaskVals = 0;
+
+ for (int i = 0; i < MaskSize; ++i) {
+ int EltIdx = SVOp->getMaskElt(i);
+ if (EltIdx == i || EltIdx == -1)
+ MaskVals |= (1<<i);
+ else if (EltIdx == (i + MaskSize))
+ continue; // Bit is set to zero;
+ else return SDValue();
+ }
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
+ SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
+ DAG.getConstant(MaskVals, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+}
+
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@@ -5439,11 +5479,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// Determine if more than 1 of the words in each of the low and high quadwords
// of the result come from the same quadword of one of the two inputs. Undef
// mask values count as coming from any quadword, for better codegen.
- SmallVector<unsigned, 4> LoQuad(4);
- SmallVector<unsigned, 4> HiQuad(4);
- BitVector InputQuads(4);
+ unsigned LoQuad[] = { 0, 0, 0, 0 };
+ unsigned HiQuad[] = { 0, 0, 0, 0 };
+ std::bitset<4> InputQuads;
for (unsigned i = 0; i < 8; ++i) {
- SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+ unsigned *Quad = i < 4 ? LoQuad : HiQuad;
int EltIdx = SVOp->getMaskElt(i);
MaskVals.push_back(EltIdx);
if (EltIdx < 0) {
@@ -5481,10 +5521,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
- BestLoQuad = InputQuads.find_first();
- BestHiQuad = InputQuads.find_next(BestLoQuad);
+ BestLoQuad = InputQuads[0] ? 0 : 1;
+ BestHiQuad = InputQuads[2] ? 2 : 3;
}
if (InputQuads.count() > 2) {
BestLoQuad = -1;
@@ -5497,9 +5537,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// words from all 4 input quadwords.
SDValue NewV;
if (BestLoQuad >= 0 || BestHiQuad >= 0) {
- SmallVector<int, 8> MaskV;
- MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
- MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+ int MaskV[] = {
+ BestLoQuad < 0 ? 0 : BestLoQuad,
+ BestHiQuad < 0 ? 1 : BestHiQuad
+ };
NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
@@ -5544,8 +5585,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
unsigned TargetMask = 0;
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
- TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
- X86::getShufflePSHUFLWImmediate(NewV.getNode());
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
+ TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
+ getShufflePSHUFLWImmediate(SVOp);
V1 = NewV.getOperand(0);
return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
}
@@ -5554,7 +5596,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -5602,61 +5644,51 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
// and update MaskVals with new element order.
- BitVector InOrder(8);
+ std::bitset<8> InOrder;
if (BestLoQuad >= 0) {
- SmallVector<int, 8> MaskV;
+ int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
for (int i = 0; i != 4; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
- MaskV.push_back(-1);
InOrder.set(i);
} else if ((idx / 4) == BestLoQuad) {
- MaskV.push_back(idx & 3);
+ MaskV[i] = idx & 3;
InOrder.set(i);
- } else {
- MaskV.push_back(-1);
}
}
- for (unsigned i = 4; i != 8; ++i)
- MaskV.push_back(i);
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
- (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
- NewV.getOperand(0),
- X86::getShufflePSHUFLWImmediate(NewV.getNode()),
- DAG);
+ NewV.getOperand(0),
+ getShufflePSHUFLWImmediate(SVOp), DAG);
+ }
}
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
// and update MaskVals with the new element order.
if (BestHiQuad >= 0) {
- SmallVector<int, 8> MaskV;
- for (unsigned i = 0; i != 4; ++i)
- MaskV.push_back(i);
+ int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
for (unsigned i = 4; i != 8; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
- MaskV.push_back(-1);
InOrder.set(i);
} else if ((idx / 4) == BestHiQuad) {
- MaskV.push_back((idx & 3) + 4);
+ MaskV[i] = (idx & 3) + 4;
InOrder.set(i);
- } else {
- MaskV.push_back(-1);
}
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
- (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
- NewV.getOperand(0),
- X86::getShufflePSHUFHWImmediate(NewV.getNode()),
- DAG);
+ NewV.getOperand(0),
+ getShufflePSHUFHWImmediate(SVOp), DAG);
+ }
}
// In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -5698,8 +5730,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- SmallVector<int, 16> MaskVals;
- SVOp->getMask(MaskVals);
+ ArrayRef<int> MaskVals = SVOp->getMask();
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
@@ -5718,7 +5749,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
- if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) {
+ if (TLI.getSubtarget()->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
@@ -5849,7 +5880,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
EVT NewVT;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(false && "Unexpected!");
+ default: llvm_unreachable("Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
@@ -5915,96 +5946,89 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
OpVT, SrcOp)));
}
-/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
-/// shuffle node referes to only one lane in the sources.
-static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
- int HalfSize = NumElems/2;
- SmallVector<int, 16> M;
- SVOp->getMask(M);
- bool MatchA = false, MatchB = false;
-
- for (int l = 0; l < NumElems*2; l += HalfSize) {
- if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
- MatchA = true;
- break;
- }
- }
-
- for (int l = 0; l < NumElems*2; l += HalfSize) {
- if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
- MatchB = true;
- break;
- }
- }
-
- return MatchA && MatchB;
-}
-
/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
/// which could not be matched by any known target speficic shuffle
static SDValue
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
- if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
- // If each half of a vector shuffle node referes to only one lane in the
- // source vectors, extract each used 128-bit lane and shuffle them using
- // 128-bit shuffles. Then, concatenate the results. Otherwise leave
- // the work to the legalizer.
- DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
- int HalfSize = NumElems/2;
-
- // Extract the reference for each half
- int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
- int FstVecOpNum = 0, SndVecOpNum = 0;
- for (int i = 0; i < HalfSize; ++i) {
- int Elt = SVOp->getMaskElt(i);
- if (SVOp->getMaskElt(i) < 0)
- continue;
- FstVecOpNum = Elt/NumElems;
- FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
- break;
- }
- for (int i = HalfSize; i < NumElems; ++i) {
- int Elt = SVOp->getMaskElt(i);
- if (SVOp->getMaskElt(i) < 0)
+ EVT VT = SVOp->getValueType(0);
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumLaneElems = NumElems / 2;
+
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ SDValue Shufs[2];
+
+ SmallVector<int, 16> Mask;
+ for (unsigned l = 0; l < 2; ++l) {
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ int InputUsed[2] = { -1, -1 }; // Not yet discovered.
+ unsigned LaneStart = l * NumLaneElems;
+ for (unsigned i = 0; i != NumLaneElems; ++i) {
+ // The mask element. This indexes into the input.
+ int Idx = SVOp->getMaskElt(i+LaneStart);
+ if (Idx < 0) {
+ // the mask element does not index into any input vector.
+ Mask.push_back(-1);
continue;
- SndVecOpNum = Elt/NumElems;
- SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
- break;
- }
+ }
- // Extract the subvectors
- SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
- DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
- SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
- DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
+ // The input vector this mask element indexes into.
+ int Input = Idx / NumLaneElems;
- // Generate 128-bit shuffles
- SmallVector<int, 16> MaskV1, MaskV2;
- for (int i = 0; i < HalfSize; ++i) {
- int Elt = SVOp->getMaskElt(i);
- MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
- }
- for (int i = HalfSize; i < NumElems; ++i) {
- int Elt = SVOp->getMaskElt(i);
- MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NumLaneElems;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input)
+ // This input vector is already an operand.
+ break;
+ if (InputUsed[OpNo] < 0) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up.
+ return SDValue();
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Mask.push_back(Idx + OpNo * NumLaneElems);
}
- EVT NVT = V1.getValueType();
- V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
- V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
+ if (InputUsed[0] < 0) {
+ // No input vectors were used! The result is undefined.
+ Shufs[l] = DAG.getUNDEF(NVT);
+ } else {
+ SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
+ DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32),
+ DAG, dl);
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
+ Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
+ DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32),
+ DAG, dl);
+ // At least one input vector was used. Create a new shuffle vector.
+ Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
+ }
- // Concatenate the result back
- SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
- DAG.getConstant(0, MVT::i32), DAG, dl);
- return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
- DAG, dl);
+ Mask.clear();
}
- return SDValue();
+ // Concatenate the result back
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0],
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32),
+ DAG, dl);
}
/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
@@ -6018,11 +6042,9 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
- SmallVector<std::pair<int, int>, 8> Locs;
- Locs.resize(4);
- SmallVector<int, 8> Mask1(4U, -1);
- SmallVector<int, 8> PermMask;
- SVOp->getMask(PermMask);
+ std::pair<int, int> Locs[4];
+ int Mask1[] = { -1, -1, -1, -1 };
+ SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
unsigned NumHi = 0;
unsigned NumLo = 0;
@@ -6052,17 +6074,14 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// vector operands, put the elements into the right order.
V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
- SmallVector<int, 8> Mask2(4U, -1);
+ int Mask2[] = { -1, -1, -1, -1 };
- for (unsigned i = 0; i != 4; ++i) {
- if (Locs[i].first == -1)
- continue;
- else {
+ for (unsigned i = 0; i != 4; ++i)
+ if (Locs[i].first != -1) {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
Mask2[i] = Idx;
}
- }
return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
} else if (NumLo == 3 || NumHi == 3) {
@@ -6075,7 +6094,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
- CommuteVectorShuffleMask(PermMask, VT);
+ CommuteVectorShuffleMask(PermMask, 4);
std::swap(V1, V2);
}
@@ -6115,18 +6134,16 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
- Locs.clear();
- Locs.resize(4);
- SmallVector<int,8> LoMask(4U, -1);
- SmallVector<int,8> HiMask(4U, -1);
+ int LoMask[] = { -1, -1, -1, -1 };
+ int HiMask[] = { -1, -1, -1, -1 };
- SmallVector<int,8> *MaskPtr = &LoMask;
+ int *MaskPtr = LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
for (unsigned i = 0; i != 4; ++i) {
if (i == 2) {
- MaskPtr = &HiMask;
+ MaskPtr = HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = 2;
@@ -6136,26 +6153,21 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
Locs[i] = std::make_pair(-1, -1);
} else if (Idx < 4) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
- (*MaskPtr)[LoIdx] = Idx;
+ MaskPtr[LoIdx] = Idx;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
- (*MaskPtr)[HiIdx] = Idx;
+ MaskPtr[HiIdx] = Idx;
HiIdx++;
}
}
SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
- SmallVector<int, 8> MaskOps;
- for (unsigned i = 0; i != 4; ++i) {
- if (Locs[i].first == -1) {
- MaskOps.push_back(-1);
- } else {
- unsigned Idx = Locs[i].first * 4 + Locs[i].second;
- MaskOps.push_back(Idx);
- }
- }
+ int MaskOps[] = { -1, -1, -1, -1 };
+ for (unsigned i = 0; i != 4; ++i)
+ if (Locs[i].first != -1)
+ MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
}
@@ -6164,6 +6176,10 @@ static bool MayFoldVectorLoad(SDValue V) {
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
+ V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
+ // BUILD_VECTOR (load), undef
+ V = V.getOperand(0);
if (MayFoldLoad(V))
return true;
return false;
@@ -6186,82 +6202,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) {
return false;
}
-/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
-/// a vector extract, and if both can be later optimized into a single load.
-/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
-/// here because otherwise a target specific shuffle node is going to be
-/// emitted for this shuffle, and the optimization not done.
-/// FIXME: This is probably not the best approach, but fix the problem
-/// until the right path is decided.
-static
-bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT = V.getValueType();
- ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
-
- // Be sure that the vector shuffle is present in a pattern like this:
- // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
- if (!V.hasOneUse())
- return false;
-
- SDNode *N = *V.getNode()->use_begin();
- if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return false;
-
- SDValue EltNo = N->getOperand(1);
- if (!isa<ConstantSDNode>(EltNo))
- return false;
-
- // If the bit convert changed the number of elements, it is unsafe
- // to examine the mask.
- bool HasShuffleIntoBitcast = false;
- if (V.getOpcode() == ISD::BITCAST) {
- EVT SrcVT = V.getOperand(0).getValueType();
- if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
- return false;
- V = V.getOperand(0);
- HasShuffleIntoBitcast = true;
- }
-
- // Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
- V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
-
- // Skip one more bit_convert if necessary
- if (V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
-
- if (ISD::isNormalLoad(V.getNode())) {
- // Is the original load suitable?
- LoadSDNode *LN0 = cast<LoadSDNode>(V);
-
- // FIXME: avoid the multi-use bug that is preventing lots of
- // of foldings to be detected, this is still wrong of course, but
- // give the temporary desired behavior, and if it happens that
- // the load has real more uses, during isel it will not fold, and
- // will generate poor code.
- if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
- return false;
-
- if (!HasShuffleIntoBitcast)
- return true;
-
- // If there's a bitcast before the shuffle, check if the load type and
- // alignment is valid.
- unsigned Align = LN0->getAlignment();
- unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
- return false;
- }
-
- return true;
-}
-
static
SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
@@ -6275,14 +6215,14 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
- bool HasXMMInt) {
+ bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
- if (HasXMMInt && VT == MVT::v2f64)
+ if (HasSSE2 && VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
// v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
@@ -6308,24 +6248,8 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
}
-static inline unsigned getSHUFPOpcode(EVT VT) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v8i32: // Use fp unit for int unpack.
- case MVT::v8f32:
- case MVT::v4i32: // Use fp unit for int unpack.
- case MVT::v4f32: return X86ISD::SHUFPS;
- case MVT::v4i64: // Use fp unit for int unpack.
- case MVT::v4f64:
- case MVT::v2i64: // Use fp unit for int unpack.
- case MVT::v2f64: return X86ISD::SHUFPD;
- default:
- llvm_unreachable("Unknown type for shufp*");
- }
- return 0;
-}
-
static
-SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
@@ -6346,32 +6270,30 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
// turns into:
// (MOVLPSmr addr:$src1, VR128:$src2)
// So, recognize this potential and also use MOVLPS or MOVLPD
- if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
CanFoldLoad = true;
- // Both of them can't be memory operations though.
- if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
- CanFoldLoad = false;
-
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
if (CanFoldLoad) {
- if (HasXMMInt && NumElems == 2)
+ if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
if (NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ // If we don't care about the second element, procede to use movss.
+ if (SVOp->getMaskElt(1) != -1)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
}
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
// movl and movlp will both match v2i64, but v2i64 is never matched by
// movl earlier because we make it strict to avoid messing with the movlp load
// folding logic (see the code above getMOVLP call). Match it here then,
// this is horrible, but will stay like this until we move all shuffle
// matching to x86 specific nodes. Note that for the 1st condition all
// types are matched with movsd.
- if (HasXMMInt) {
+ if (HasSSE2) {
// FIXME: isMOVLMask should be checked and matched before getMOVLP,
// as to remove this logic from here, as much as possible
- if (NumElems == 2 || !X86::isMOVLMask(SVOp))
+ if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
}
@@ -6379,112 +6301,12 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
assert(VT != MVT::v4i32 && "unsupported shuffle type");
// Invert the operand order and use SHUFPS to match it.
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1,
- X86::getShuffleSHUFImmediate(SVOp), DAG);
-}
-
-static inline unsigned getUNPCKLOpcode(EVT VT) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKLDQ;
- case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v4f32: return X86ISD::UNPCKLPS;
- case MVT::v2f64: return X86ISD::UNPCKLPD;
- case MVT::v8i32: // Use fp unit for int unpack.
- case MVT::v8f32: return X86ISD::VUNPCKLPSY;
- case MVT::v4i64: // Use fp unit for int unpack.
- case MVT::v4f64: return X86ISD::VUNPCKLPDY;
- case MVT::v16i8: return X86ISD::PUNPCKLBW;
- case MVT::v8i16: return X86ISD::PUNPCKLWD;
- default:
- llvm_unreachable("Unknown type for unpckl");
- }
- return 0;
-}
-
-static inline unsigned getUNPCKHOpcode(EVT VT) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKHDQ;
- case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
- case MVT::v4f32: return X86ISD::UNPCKHPS;
- case MVT::v2f64: return X86ISD::UNPCKHPD;
- case MVT::v8i32: // Use fp unit for int unpack.
- case MVT::v8f32: return X86ISD::VUNPCKHPSY;
- case MVT::v4i64: // Use fp unit for int unpack.
- case MVT::v4f64: return X86ISD::VUNPCKHPDY;
- case MVT::v16i8: return X86ISD::PUNPCKHBW;
- case MVT::v8i16: return X86ISD::PUNPCKHWD;
- default:
- llvm_unreachable("Unknown type for unpckh");
- }
- return 0;
-}
-
-static inline unsigned getVPERMILOpcode(EVT VT) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32:
- case MVT::v4f32: return X86ISD::VPERMILPS;
- case MVT::v2i64:
- case MVT::v2f64: return X86ISD::VPERMILPD;
- case MVT::v8i32:
- case MVT::v8f32: return X86ISD::VPERMILPSY;
- case MVT::v4i64:
- case MVT::v4f64: return X86ISD::VPERMILPDY;
- default:
- llvm_unreachable("Unknown type for vpermil");
- }
- return 0;
-}
-
-/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
-/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
-/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
-static bool isVectorBroadcast(SDValue &Op) {
- EVT VT = Op.getValueType();
- bool Is256 = VT.getSizeInBits() == 256;
-
- assert((VT.getSizeInBits() == 128 || Is256) &&
- "Unsupported type for vbroadcast node");
-
- SDValue V = Op;
- if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
-
- if (Is256 && !(V.hasOneUse() &&
- V.getOpcode() == ISD::INSERT_SUBVECTOR &&
- V.getOperand(0).getOpcode() == ISD::UNDEF))
- return false;
-
- if (Is256)
- V = V.getOperand(1);
-
- if (!V.hasOneUse())
- return false;
-
- // Check the source scalar_to_vector type. 256-bit broadcasts are
- // supported for 32/64-bit sizes, while 128-bit ones are only supported
- // for 32-bit scalars.
- if (V.getOpcode() != ISD::SCALAR_TO_VECTOR)
- return false;
-
- unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
- if (ScalarSize != 32 && ScalarSize != 64)
- return false;
- if (!Is256 && ScalarSize == 64)
- return false;
-
- V = V.getOperand(0);
- if (!MayFoldLoad(V))
- return false;
-
- // Return the load node
- Op = V;
- return true;
+ return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
+ getShuffleSHUFImmediate(SVOp), DAG);
}
-static
-SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI,
- const X86Subtarget *Subtarget) {
+SDValue
+X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -6492,22 +6314,17 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
- return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
+ return getZeroVector(VT, Subtarget, DAG, dl);
// Handle splat operations
if (SVOp->isSplat()) {
unsigned NumElem = VT.getVectorNumElements();
int Size = VT.getSizeInBits();
- // Special case, this is the only place now where it's allowed to return
- // a vector_shuffle operation without using a target specific node, because
- // *hopefully* it will be optimized away by the dag combiner. FIXME: should
- // this be moved to DAGCombine instead?
- if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
- return Op;
// Use vbroadcast whenever the splat comes from a foldable load
- if (Subtarget->hasAVX() && isVectorBroadcast(V1))
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ if (Broadcast.getNode())
+ return Broadcast;
// Handle splats by matching through known shuffle masks
if ((Size == 128 && NumElem <= 4) ||
@@ -6525,21 +6342,26 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 ||
- (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) {
+ (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode()) {
- if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
- return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+ EVT NewVT = NewOp.getValueType();
+ if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
+ NewVT, true, false))
+ return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
- if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
- return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
- DAG, Subtarget, dl);
+ if (NewOp.getNode()) {
+ EVT NewVT = NewOp.getValueType();
+ if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
+ return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
+ DAG, Subtarget, dl);
+ }
}
}
return SDValue();
@@ -6553,18 +6375,22 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
- bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
- bool HasXMMInt = Subtarget->hasXMMInt();
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasAVX = Subtarget->hasAVX();
+ bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
- // Shuffle operations on MMX not supported.
- if (isMMX)
- return Op;
+ assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+
+ if (V1IsUndef && V2IsUndef)
+ return DAG.getUNDEF(VT);
+
+ assert(!V1IsUndef && "Op 1 of shuffle should not be undef");
// Vector shuffle lowering takes 3 steps:
//
@@ -6576,50 +6402,54 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// so the shuffle can be broken into other shuffles and the legalizer can
// try the lowering again.
//
- // The general ideia is that no vector_shuffle operation should be left to
+ // The general idea is that no vector_shuffle operation should be left to
// be matched during isel, all of them must be converted to a target specific
// node here.
// Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled. The actual code
// doesn't include all of those, work in progress...
- SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
if (NewOp.getNode())
return NewOp;
+ SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
+
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
- if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
- if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
- if (X86::isMOVDDUPMask(SVOp) &&
- (Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
- if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+ if (isMOVHLPS_v_undef_Mask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
- if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
- if (X86::isPSHUFDMask(SVOp)) {
+ if (isPSHUFDMask(M, VT)) {
// The actual implementation will match the mask in the if above and then
// during isel it can match several different instructions, not only pshufd
// as its name says, sad but true, emulate the behavior for now...
- if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
- return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+ if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
- unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
- if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1,
+ return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
TargetMask, DAG);
}
@@ -6627,8 +6457,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
- bool isShift = getSubtarget()->hasXMMInt() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -6637,13 +6466,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
- if (X86::isMOVLMask(SVOp)) {
- if (V1IsUndef)
- return V2;
+ if (isMOVLMask(M, VT)) {
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!X86::isMOVLPMask(SVOp)) {
- if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ if (!isMOVLPMask(M, VT)) {
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
if (VT == MVT::v4i32 || VT == MVT::v4f32)
@@ -6652,27 +6479,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// FIXME: fold these into legal mask.
- if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
- return getMOVLowToHigh(Op, dl, DAG, HasXMMInt);
+ if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
- if (X86::isMOVHLPSMask(SVOp))
+ if (isMOVHLPSMask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
- if (X86::isMOVSHDUPMask(SVOp, Subtarget))
+ if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
- if (X86::isMOVSLDUPMask(SVOp, Subtarget))
+ if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
- if (X86::isMOVLPMask(SVOp))
- return getMOVLP(Op, dl, DAG, HasXMMInt);
+ if (isMOVLPMask(M, VT))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
- if (ShouldXformToMOVHLPS(SVOp) ||
- ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+ if (ShouldXformToMOVHLPS(M, VT) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
return CommuteVectorShuffle(SVOp, DAG);
if (isShift) {
- // No better options. Use a vshl / vsrl.
+ // No better options. Use a vshldq / vsrldq.
EVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
@@ -6685,17 +6512,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
- if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
- Op = CommuteVectorShuffle(SVOp, DAG);
- SVOp = cast<ShuffleVectorSDNode>(Op);
- V1 = SVOp->getOperand(0);
- V2 = SVOp->getOperand(1);
+ if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
+ CommuteVectorShuffleMask(M, NumElems);
+ std::swap(V1, V2);
std::swap(V1IsSplat, V2IsSplat);
- std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
- if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
// Shuffling low element of v1 into undef, just return v1.
if (V2IsUndef)
return V1;
@@ -6705,81 +6529,77 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ if (isUNPCKLMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (X86::isUNPCKHMask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+ if (isUNPCKHMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
- // new vector_shuffle with the corrected mask.
- SDValue NewMask = NormalizeMask(SVOp, DAG);
- ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
- if (NSVOp != SVOp) {
- if (X86::isUNPCKLMask(NSVOp, true)) {
- return NewMask;
- } else if (X86::isUNPCKHMask(NSVOp, true)) {
- return NewMask;
- }
+ // new vector_shuffle with the corrected mask.p
+ SmallVector<int, 8> NewMask(M.begin(), M.end());
+ NormalizeMask(NewMask, NumElems);
+ if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) {
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
+ } else if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) {
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
}
if (Commuted) {
// Commute is back and try unpck* again.
// FIXME: this seems wrong.
- SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
- ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+ CommuteVectorShuffleMask(M, NumElems);
+ std::swap(V1, V2);
+ std::swap(V1IsSplat, V2IsSplat);
+ Commuted = false;
- if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ if (isUNPCKLMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (X86::isUNPCKHMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ if (isUNPCKHMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
- if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
// inlined here right now to enable us to directly emit target specific
// nodes, and remove one by one until they don't return Op anymore.
- SmallVector<int, 16> M;
- SVOp->getMask(M);
- if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ if (isPALIGNRMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
- X86::getShufflePALIGNRImmediate(SVOp),
+ getShufflePALIGNRImmediate(SVOp),
DAG);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
- if (VT == MVT::v2i64)
- return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
- X86::getShufflePSHUFHWImmediate(SVOp),
+ getShufflePSHUFHWImmediate(SVOp),
DAG);
if (isPSHUFLWMask(M, VT))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
- X86::getShufflePSHUFLWImmediate(SVOp),
+ getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT))
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- X86::getShuffleSHUFImmediate(SVOp), DAG);
+ if (isSHUFPMask(M, VT, HasAVX))
+ return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
+ getShuffleSHUFImmediate(SVOp), DAG);
- if (X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
- if (X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
// Generate target specific nodes for 128 or 256-bit shuffles only
@@ -6787,33 +6607,26 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
//
// Handle VMOVDDUPY permutations
- if (isMOVDDUPYMask(SVOp, Subtarget))
+ if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
- // Handle VPERMILPS* permutations
- if (isVPERMILPSMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPSImmediate(SVOp), DAG);
-
- // Handle VPERMILPD* permutations
- if (isVPERMILPDMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPDImmediate(SVOp), DAG);
-
- // Handle VPERM2F128 permutations
- if (isVPERM2F128Mask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
- getShuffleVPERM2F128Immediate(SVOp), DAG);
+ // Handle VPERMILPS/D* permutations
+ if (isVPERMILPMask(M, VT, HasAVX)) {
+ if (HasAVX2 && VT == MVT::v8i32)
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
+ getShuffleSHUFImmediate(SVOp), DAG);
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+ getShuffleSHUFImmediate(SVOp), DAG);
+ }
- // Handle VSHUFPSY permutations
- if (isVSHUFPSYMask(M, VT, Subtarget))
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- getShuffleVSHUFPSYImmediate(SVOp), DAG);
+ // Handle VPERM2F128/VPERM2I128 permutations
+ if (isVPERM2X128Mask(M, VT, HasAVX))
+ return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
+ V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- // Handle VSHUFPDY permutations
- if (isVSHUFPDYMask(M, VT, Subtarget))
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- getShuffleVSHUFPDYImmediate(SVOp), DAG);
+ SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG);
+ if (BlendOp.getNode())
+ return BlendOp;
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
@@ -6896,8 +6709,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
- } else if (VT == MVT::i32) {
- // ExtractPS works with constant index.
+ } else if (VT == MVT::i32 || VT == MVT::i64) {
+ // ExtractPS/pextrq works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
return Op;
}
@@ -6933,7 +6746,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
- if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
return Res;
@@ -7036,7 +6849,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
- } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+ } else if ((EltVT == MVT::i32 || EltVT == MVT::i64) &&
+ isa<ConstantSDNode>(N2)) {
// PINSR* works with constant index.
return Op;
}
@@ -7074,7 +6888,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
}
- if (Subtarget->hasSSE41() || Subtarget->hasAVX())
+ if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
@@ -7276,7 +7090,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
// load.
if (isGlobalStubReference(OpFlag))
Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}
@@ -7344,7 +7158,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -7423,7 +7237,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getIntPtrConstant(0),
- MachinePointerInfo(Ptr), false, false, 0);
+ MachinePointerInfo(Ptr),
+ false, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
@@ -7449,7 +7264,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
if (model == TLSModel::InitialExec)
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
@@ -7471,8 +7286,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
- TLSModel::Model model
- = getTLSModel(GV, getTargetMachine().getRelocationModel());
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
@@ -7529,19 +7343,77 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
Chain.getValue(1));
- }
+ } else if (Subtarget->isTargetWindows()) {
+ // Just use the implicit TLS architecture
+ // Need to generate someting similar to:
+ // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
+ // ; from TEB
+ // mov ecx, dword [rel _tls_index]: Load index (from C runtime)
+ // mov rcx, qword [rdx+rcx*8]
+ // mov eax, .tls$:tlsvar
+ // [rax+rcx] contains the address
+ // Windows 64bit: gs:0x58
+ // Windows 32bit: fs:__tls_array
- assert(false &&
- "TLS not implemented for this target.");
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Chain = DAG.getEntryNode();
- llvm_unreachable("Unreachable");
- return SDValue();
+ // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
+ // %gs:0x58 (64-bit).
+ Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
+ ? Type::getInt8PtrTy(*DAG.getContext(),
+ 256)
+ : Type::getInt32PtrTy(*DAG.getContext(),
+ 257));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
+ Subtarget->is64Bit()
+ ? DAG.getIntPtrConstant(0x58)
+ : DAG.getExternalSymbol("_tls_array",
+ getPointerTy()),
+ MachinePointerInfo(Ptr),
+ false, false, false, 0);
+
+ // Load the _tls_index variable
+ SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+ if (Subtarget->is64Bit())
+ IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
+ IDX, MachinePointerInfo(), MVT::i32,
+ false, false, 0);
+ else
+ IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
+ false, false, false, 0);
+
+ SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
+ getPointerTy());
+ IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+
+ SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+ res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Get the offset of start of .tls section
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), X86II::MO_SECREL);
+ SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
+ }
+
+ llvm_unreachable("TLS not implemented for this target.");
}
-/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
-/// take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
+/// and take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -7673,7 +7545,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
Op.getValueType(), MMO);
Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ false, false, false, 0);
}
return Result;
@@ -7682,85 +7554,65 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SelectionDAG &DAG) const {
- // This algorithm is not obvious. Here it is in C code, more or less:
+ // This algorithm is not obvious. Here it is what we're trying to output:
/*
- double uint64_to_double( uint32_t hi, uint32_t lo ) {
- static const __m128i exp = { 0x4330000045300000ULL, 0 };
- static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
-
- // Copy ints to xmm registers.
- __m128i xh = _mm_cvtsi32_si128( hi );
- __m128i xl = _mm_cvtsi32_si128( lo );
-
- // Combine into low half of a single xmm register.
- __m128i x = _mm_unpacklo_epi32( xh, xl );
- __m128d d;
- double sd;
-
- // Merge in appropriate exponents to give the integer bits the right
- // magnitude.
- x = _mm_unpacklo_epi32( x, exp );
-
- // Subtract away the biases to deal with the IEEE-754 double precision
- // implicit 1.
- d = _mm_sub_pd( (__m128d) x, bias );
-
- // All conversions up to here are exact. The correctly rounded result is
- // calculated using the current rounding mode using the following
- // horizontal add.
- d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
- _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this
- // store doesn't really need to be here (except
- // maybe to zero the other double)
- return sd;
- }
+ movq %rax, %xmm0
+ punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
+ subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
+ #ifdef __SSE3__
+ haddpd %xmm0, %xmm0
+ #else
+ pshufd $0x4e, %xmm0, %xmm1
+ addpd %xmm1, %xmm0
+ #endif
*/
DebugLoc dl = Op.getDebugLoc();
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
- std::vector<Constant*> CV0;
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
- Constant *C0 = ConstantVector::get(CV0);
+ const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+ Constant *C0 = ConstantDataVector::get(*Context, CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
- std::vector<Constant*> CV1;
+ SmallVector<Constant*,2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
- SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
- DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Op.getOperand(0),
- DAG.getIntPtrConstant(1)));
- SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
- DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Op.getOperand(0),
- DAG.getIntPtrConstant(0)));
- SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+ // Load the 64-bit value into an XMM register.
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ Op.getOperand(0));
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(),
- false, false, 16);
- SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
- SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
+ false, false, false, 16);
+ SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
+ CLod0);
+
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
MachinePointerInfo::getConstantPool(),
- false, false, 16);
+ false, false, false, 16);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+ SDValue Result;
+
+ if (Subtarget->hasSSE3()) {
+ // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
+ Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
+ } else {
+ SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
+ SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
+ S2F, 0x4E, DAG);
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
+ Sub);
+ }
- // Add the halves; easiest way is to swap them into another reg first.
- int ShufMask[2] = { 1, -1 };
- SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
- DAG.getUNDEF(MVT::v2f64), ShufMask);
- SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0));
}
@@ -7777,8 +7629,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
Op.getOperand(0));
// Zero out the upper parts of the register.
- Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(),
- DAG);
+ Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
@@ -7830,6 +7681,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
return LowerUINT_TO_FP_i64(Op, DAG);
else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
+ else if (Subtarget->is64Bit() &&
+ SrcVT == MVT::i64 && DstVT == MVT::f32)
+ return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
@@ -7849,7 +7703,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, MachinePointerInfo(),
+ StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
@@ -7897,19 +7751,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
}
std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
- if (!IsSigned) {
+ if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
assert(DstTy.getSimpleVT() <= MVT::i64 &&
DstTy.getSimpleVT() >= MVT::i16 &&
- "Unknown FP_TO_SINT to lower!");
+ "Unknown FP_TO_INT to lower!");
// These are really Legal.
if (DstTy == MVT::i32 &&
@@ -7920,26 +7774,29 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
- // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
- // stack slot.
+ // We lower FP->int64 either into FISTP64 followed by a load from a temporary
+ // stack slot, or into the FTOL runtime function.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
-
unsigned Opc;
- switch (DstTy.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
- }
+ if (!IsSigned && isIntegerTypeFTOL(DstTy))
+ Opc = X86ISD::WIN_FTOL;
+ else
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
EVT TheVT = Op.getOperand(0).getValueType();
+ // FIXME This causes a redundant load/store if the SSE-class value is already
+ // in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
@@ -7964,12 +7821,26 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);
- // Build the FP_TO_INT*_IN_MEM
- SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
- Ops, 3, DstTy, MMO);
-
- return std::make_pair(FIST, StackSlot);
+ if (Opc != X86ISD::WIN_FTOL) {
+ // Build the FP_TO_INT*_IN_MEM
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
+ return std::make_pair(FIST, StackSlot);
+ } else {
+ SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, Value);
+ SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
+ MVT::i32, ftol.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
+ MVT::i32, eax.getValue(2));
+ SDValue Ops[] = { eax, edx };
+ SDValue pair = IsReplace
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
+ : DAG.getMergeValues(Ops, 2, DL);
+ return std::make_pair(pair, SDValue());
+ }
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
@@ -7977,25 +7848,37 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
if (Op.getValueType().isVector())
return SDValue();
- std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
+ /*IsSigned=*/ true, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
if (FIST.getNode() == 0) return Op;
- // Load the result.
- return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+ if (StackSlot.getNode())
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(),
+ false, false, false, 0);
+ else
+ // The node is the result.
+ return FIST;
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
SelectionDAG &DAG) const {
- std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
+ /*IsSigned=*/ false, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
assert(FIST.getNode() && "Unexpected failure");
- // Load the result.
- return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+ if (StackSlot.getNode())
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(),
+ false, false, false, 0);
+ else
+ // The node is the result.
+ return FIST;
}
SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -8006,23 +7889,18 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
EVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
- std::vector<Constant*> CV;
+ Constant *C;
if (EltVT == MVT::f64) {
- Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
- CV.push_back(C);
- CV.push_back(C);
+ C = ConstantVector::getSplat(2,
+ ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
} else {
- Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
+ C = ConstantVector::getSplat(4,
+ ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
}
- Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, 16);
+ false, false, false, 16);
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
@@ -8031,31 +7909,28 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT EltVT = VT;
- if (VT.isVector())
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
EltVT = VT.getVectorElementType();
- std::vector<Constant*> CV;
- if (EltVT == MVT::f64) {
- Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
- CV.push_back(C);
- CV.push_back(C);
- } else {
- Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
+ NumElts = VT.getVectorNumElements();
}
- Constant *C = ConstantVector::get(CV);
+ Constant *C;
+ if (EltVT == MVT::f64)
+ C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ else
+ C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, 16);
+ false, false, false, 16);
if (VT.isVector()) {
+ MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::XOR, dl, MVT::v2i64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::XOR, dl, XORVT,
+ DAG.getNode(ISD::BITCAST, dl, XORVT,
Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+ DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
@@ -8084,7 +7959,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
- std::vector<Constant*> CV;
+ SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
@@ -8098,7 +7973,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, 16);
+ false, false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
@@ -8127,7 +8002,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, 16);
+ false, false, false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
// Or the value with the sign bit.
@@ -8191,8 +8066,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
// climbing the DAG back to the root, and it doesn't seem to be worth the
// effort.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg &&
+ UI->getOpcode() != ISD::SETCC &&
+ UI->getOpcode() != ISD::STORE)
goto default_case;
if (ConstantSDNode *C =
@@ -8325,8 +8202,8 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
- APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
- DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ APInt Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Zeros, Ones);
if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
return SDValue();
}
@@ -8335,11 +8212,19 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ uint64_t AndRHSVal = AndRHS->getZExtValue();
SDValue AndLHS = Op0;
- if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+
+ if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
}
+
+ // Use BT if the immediate can't be encoded in a TEST instruction.
+ if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
+ LHS = AndLHS;
+ RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
+ }
}
if (LHS.getNode()) {
@@ -8466,9 +8351,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (isFP) {
unsigned SSECC = 8;
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
- unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
// SSE Condition code mapping:
@@ -8508,61 +8392,57 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (SSECC == 8) {
if (SetCCOpcode == ISD::SETUEQ) {
SDValue UNORD, EQ;
- UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(0, MVT::i8));
return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
- }
- else if (SetCCOpcode == ISD::SETONE) {
+ } else if (SetCCOpcode == ISD::SETONE) {
SDValue ORD, NEQ;
- ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(4, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
}
llvm_unreachable("Illegal FP comparison");
}
// Handle all other FP comparisons here.
- return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+ return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(SSECC, MVT::i8));
}
// Break 256-bit integer vector compare into smaller ones.
- if (!isFP && VT.getSizeInBits() == 256)
+ if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+ unsigned Opc = 0;
bool Swap = false, Invert = false, FlipSigns = false;
- switch (VT.getSimpleVT().SimpleTy) {
- default: break;
- case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
- case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
- case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
- case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
- }
-
switch (SetCCOpcode) {
default: break;
case ISD::SETNE: Invert = true;
- case ISD::SETEQ: Opc = EQOpc; break;
+ case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
- case ISD::SETGT: Opc = GTOpc; break;
+ case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
- case ISD::SETLE: Opc = GTOpc; Invert = true; break;
+ case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break;
case ISD::SETULT: Swap = true;
- case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+ case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
- case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+ case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
}
if (Swap)
std::swap(Op0, Op1);
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+ if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
return SDValue();
- if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+ if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
return SDValue();
// Since SSE has no unsigned integer comparisons, we need to flip the sign
@@ -8679,8 +8559,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC ||
- Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+ unsigned CondOpcode = Cond.getOpcode();
+ if (CondOpcode == X86ISD::SETCC ||
+ CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -8697,6 +8578,39 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = Cmp;
addTest = false;
}
+ } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
+ CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
+ ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
+ Cond.getOperand(0).getValueType() != MVT::i8)) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ unsigned X86Opcode;
+ unsigned X86Cond;
+ SDVTList VTs;
+ switch (CondOpcode) {
+ case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
+ case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+ case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
+ case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+ case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
+ case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
+ default: llvm_unreachable("unexpected overflowing operator");
+ }
+ if (CondOpcode == ISD::UMULO)
+ VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
+ MVT::i32);
+ else
+ VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+
+ SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
+
+ if (CondOpcode == ISD::UMULO)
+ Cond = X86Op.getValue(2);
+ else
+ Cond = X86Op.getValue(1);
+
+ CC = DAG.getConstant(X86Cond, MVT::i8);
+ addTest = false;
}
if (addTest) {
@@ -8778,11 +8692,27 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
SDValue CC;
+ bool Inverted = false;
if (Cond.getOpcode() == ISD::SETCC) {
- SDValue NewCond = LowerSETCC(Cond, DAG);
- if (NewCond.getNode())
- Cond = NewCond;
+ // Check for setcc([su]{add,sub,mul}o == 0).
+ if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
+ isa<ConstantSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
+ Cond.getOperand(0).getResNo() == 1 &&
+ (Cond.getOperand(0).getOpcode() == ISD::SADDO ||
+ Cond.getOperand(0).getOpcode() == ISD::UADDO ||
+ Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
+ Cond.getOperand(0).getOpcode() == ISD::USUBO ||
+ Cond.getOperand(0).getOpcode() == ISD::SMULO ||
+ Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
+ Inverted = true;
+ Cond = Cond.getOperand(0);
+ } else {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
}
#if 0
// FIXME: LowerXALUO doesn't handle these!!
@@ -8803,8 +8733,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC ||
- Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+ unsigned CondOpcode = Cond.getOpcode();
+ if (CondOpcode == X86ISD::SETCC ||
+ CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -8825,6 +8756,43 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
break;
}
}
+ }
+ CondOpcode = Cond.getOpcode();
+ if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
+ CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
+ ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
+ Cond.getOperand(0).getValueType() != MVT::i8)) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ unsigned X86Opcode;
+ unsigned X86Cond;
+ SDVTList VTs;
+ switch (CondOpcode) {
+ case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
+ case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+ case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
+ case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+ case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
+ case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
+ default: llvm_unreachable("unexpected overflowing operator");
+ }
+ if (Inverted)
+ X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
+ if (CondOpcode == ISD::UMULO)
+ VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
+ MVT::i32);
+ else
+ VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+
+ SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
+
+ if (CondOpcode == ISD::UMULO)
+ Cond = X86Op.getValue(2);
+ else
+ Cond = X86Op.getValue(1);
+
+ CC = DAG.getConstant(X86Cond, MVT::i8);
+ addTest = false;
} else {
unsigned CondOpc;
if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
@@ -8888,6 +8856,66 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
CC = DAG.getConstant(CCode, MVT::i8);
Cond = Cond.getOperand(0).getOperand(1);
addTest = false;
+ } else if (Cond.getOpcode() == ISD::SETCC &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
+ // For FCMP_OEQ, we can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Op.getNode()->hasOneUse()) {
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+ Dest = FalseBB;
+
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ Cond.getOperand(0), Cond.getOperand(1));
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = DAG.getConstant(X86::COND_P, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ } else if (Cond.getOpcode() == ISD::SETCC &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
+ // For FCMP_UNE, we can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Op.getNode()->hasOneUse()) {
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_UNE.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ Cond.getOperand(0), Cond.getOperand(1));
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = DAG.getConstant(X86::COND_NP, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ Dest = FalseBB;
+ }
+ }
}
}
@@ -8926,7 +8954,7 @@ SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
- EnableSegmentedStacks) &&
+ getTargetMachine().Options.EnableSegmentedStacks) &&
"This should be used only on Windows targets or when segmented stacks "
"are being used");
assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
@@ -8940,7 +8968,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
- if (EnableSegmentedStacks) {
+ if (getTargetMachine().Options.EnableSegmentedStacks) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -9076,10 +9104,10 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
- assert(!UseSoftFloat &&
+ assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
- Subtarget->hasXMM());
+ Subtarget->hasSSE1());
}
// Insert VAARG_64 node into the DAG
@@ -9106,7 +9134,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
Chain,
VAARG,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
@@ -9125,6 +9153,43 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// may or may not be a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue SrcOp, SDValue ShAmt,
+ SelectionDAG &DAG) {
+ assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
+
+ if (isa<ConstantSDNode>(ShAmt)) {
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI:
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+ }
+ }
+
+ // Change opcode to non-immediate version
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
+ case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
+ case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
+ }
+
+ // Need to build a vector containing shift amount
+ // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShOps[2] = DAG.getUNDEF(MVT::i32);
+ ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+}
+
SDValue
X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -9159,7 +9224,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
unsigned Opc = 0;
ISD::CondCode CC = ISD::SETCC_INVALID;
switch (IntNo) {
- default: break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse2_comieq_sd:
Opc = X86ISD::COMI;
@@ -9231,7 +9296,201 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ // XOP comparison intrinsics
+ case Intrinsic::x86_xop_vpcomltb:
+ case Intrinsic::x86_xop_vpcomltw:
+ case Intrinsic::x86_xop_vpcomltd:
+ case Intrinsic::x86_xop_vpcomltq:
+ case Intrinsic::x86_xop_vpcomltub:
+ case Intrinsic::x86_xop_vpcomltuw:
+ case Intrinsic::x86_xop_vpcomltud:
+ case Intrinsic::x86_xop_vpcomltuq:
+ case Intrinsic::x86_xop_vpcomleb:
+ case Intrinsic::x86_xop_vpcomlew:
+ case Intrinsic::x86_xop_vpcomled:
+ case Intrinsic::x86_xop_vpcomleq:
+ case Intrinsic::x86_xop_vpcomleub:
+ case Intrinsic::x86_xop_vpcomleuw:
+ case Intrinsic::x86_xop_vpcomleud:
+ case Intrinsic::x86_xop_vpcomleuq:
+ case Intrinsic::x86_xop_vpcomgtb:
+ case Intrinsic::x86_xop_vpcomgtw:
+ case Intrinsic::x86_xop_vpcomgtd:
+ case Intrinsic::x86_xop_vpcomgtq:
+ case Intrinsic::x86_xop_vpcomgtub:
+ case Intrinsic::x86_xop_vpcomgtuw:
+ case Intrinsic::x86_xop_vpcomgtud:
+ case Intrinsic::x86_xop_vpcomgtuq:
+ case Intrinsic::x86_xop_vpcomgeb:
+ case Intrinsic::x86_xop_vpcomgew:
+ case Intrinsic::x86_xop_vpcomged:
+ case Intrinsic::x86_xop_vpcomgeq:
+ case Intrinsic::x86_xop_vpcomgeub:
+ case Intrinsic::x86_xop_vpcomgeuw:
+ case Intrinsic::x86_xop_vpcomgeud:
+ case Intrinsic::x86_xop_vpcomgeuq:
+ case Intrinsic::x86_xop_vpcomeqb:
+ case Intrinsic::x86_xop_vpcomeqw:
+ case Intrinsic::x86_xop_vpcomeqd:
+ case Intrinsic::x86_xop_vpcomeqq:
+ case Intrinsic::x86_xop_vpcomequb:
+ case Intrinsic::x86_xop_vpcomequw:
+ case Intrinsic::x86_xop_vpcomequd:
+ case Intrinsic::x86_xop_vpcomequq:
+ case Intrinsic::x86_xop_vpcomneb:
+ case Intrinsic::x86_xop_vpcomnew:
+ case Intrinsic::x86_xop_vpcomned:
+ case Intrinsic::x86_xop_vpcomneq:
+ case Intrinsic::x86_xop_vpcomneub:
+ case Intrinsic::x86_xop_vpcomneuw:
+ case Intrinsic::x86_xop_vpcomneud:
+ case Intrinsic::x86_xop_vpcomneuq:
+ case Intrinsic::x86_xop_vpcomfalseb:
+ case Intrinsic::x86_xop_vpcomfalsew:
+ case Intrinsic::x86_xop_vpcomfalsed:
+ case Intrinsic::x86_xop_vpcomfalseq:
+ case Intrinsic::x86_xop_vpcomfalseub:
+ case Intrinsic::x86_xop_vpcomfalseuw:
+ case Intrinsic::x86_xop_vpcomfalseud:
+ case Intrinsic::x86_xop_vpcomfalseuq:
+ case Intrinsic::x86_xop_vpcomtrueb:
+ case Intrinsic::x86_xop_vpcomtruew:
+ case Intrinsic::x86_xop_vpcomtrued:
+ case Intrinsic::x86_xop_vpcomtrueq:
+ case Intrinsic::x86_xop_vpcomtrueub:
+ case Intrinsic::x86_xop_vpcomtrueuw:
+ case Intrinsic::x86_xop_vpcomtrueud:
+ case Intrinsic::x86_xop_vpcomtrueuq: {
+ unsigned CC = 0;
+ unsigned Opc = 0;
+
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_xop_vpcomltb:
+ case Intrinsic::x86_xop_vpcomltw:
+ case Intrinsic::x86_xop_vpcomltd:
+ case Intrinsic::x86_xop_vpcomltq:
+ CC = 0;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomltub:
+ case Intrinsic::x86_xop_vpcomltuw:
+ case Intrinsic::x86_xop_vpcomltud:
+ case Intrinsic::x86_xop_vpcomltuq:
+ CC = 0;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomleb:
+ case Intrinsic::x86_xop_vpcomlew:
+ case Intrinsic::x86_xop_vpcomled:
+ case Intrinsic::x86_xop_vpcomleq:
+ CC = 1;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomleub:
+ case Intrinsic::x86_xop_vpcomleuw:
+ case Intrinsic::x86_xop_vpcomleud:
+ case Intrinsic::x86_xop_vpcomleuq:
+ CC = 1;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomgtb:
+ case Intrinsic::x86_xop_vpcomgtw:
+ case Intrinsic::x86_xop_vpcomgtd:
+ case Intrinsic::x86_xop_vpcomgtq:
+ CC = 2;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomgtub:
+ case Intrinsic::x86_xop_vpcomgtuw:
+ case Intrinsic::x86_xop_vpcomgtud:
+ case Intrinsic::x86_xop_vpcomgtuq:
+ CC = 2;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomgeb:
+ case Intrinsic::x86_xop_vpcomgew:
+ case Intrinsic::x86_xop_vpcomged:
+ case Intrinsic::x86_xop_vpcomgeq:
+ CC = 3;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomgeub:
+ case Intrinsic::x86_xop_vpcomgeuw:
+ case Intrinsic::x86_xop_vpcomgeud:
+ case Intrinsic::x86_xop_vpcomgeuq:
+ CC = 3;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomeqb:
+ case Intrinsic::x86_xop_vpcomeqw:
+ case Intrinsic::x86_xop_vpcomeqd:
+ case Intrinsic::x86_xop_vpcomeqq:
+ CC = 4;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomequb:
+ case Intrinsic::x86_xop_vpcomequw:
+ case Intrinsic::x86_xop_vpcomequd:
+ case Intrinsic::x86_xop_vpcomequq:
+ CC = 4;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomneb:
+ case Intrinsic::x86_xop_vpcomnew:
+ case Intrinsic::x86_xop_vpcomned:
+ case Intrinsic::x86_xop_vpcomneq:
+ CC = 5;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomneub:
+ case Intrinsic::x86_xop_vpcomneuw:
+ case Intrinsic::x86_xop_vpcomneud:
+ case Intrinsic::x86_xop_vpcomneuq:
+ CC = 5;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomfalseb:
+ case Intrinsic::x86_xop_vpcomfalsew:
+ case Intrinsic::x86_xop_vpcomfalsed:
+ case Intrinsic::x86_xop_vpcomfalseq:
+ CC = 6;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomfalseub:
+ case Intrinsic::x86_xop_vpcomfalseuw:
+ case Intrinsic::x86_xop_vpcomfalseud:
+ case Intrinsic::x86_xop_vpcomfalseuq:
+ CC = 6;
+ Opc = X86ISD::VPCOMU;
+ break;
+ case Intrinsic::x86_xop_vpcomtrueb:
+ case Intrinsic::x86_xop_vpcomtruew:
+ case Intrinsic::x86_xop_vpcomtrued:
+ case Intrinsic::x86_xop_vpcomtrueq:
+ CC = 7;
+ Opc = X86ISD::VPCOM;
+ break;
+ case Intrinsic::x86_xop_vpcomtrueub:
+ case Intrinsic::x86_xop_vpcomtrueuw:
+ case Intrinsic::x86_xop_vpcomtrueud:
+ case Intrinsic::x86_xop_vpcomtrueuq:
+ CC = 7;
+ Opc = X86ISD::VPCOMU;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ return DAG.getNode(Opc, dl, Op.getValueType(), LHS, RHS,
+ DAG.getConstant(CC, MVT::i8));
+ }
+
// Arithmetic intrinsics.
+ case Intrinsic::x86_sse2_pmulu_dq:
+ case Intrinsic::x86_avx2_pmulu_dq:
+ return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
@@ -9244,6 +9503,62 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_hsub_pd_256:
return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b:
+ return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_ssse3_psign_b_128:
+ case Intrinsic::x86_ssse3_psign_w_128:
+ case Intrinsic::x86_ssse3_psign_d_128:
+ case Intrinsic::x86_avx2_psign_b:
+ case Intrinsic::x86_avx2_psign_w:
+ case Intrinsic::x86_avx2_psign_d:
+ return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse41_insertps:
+ return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::x86_avx_vpermil_ps:
+ case Intrinsic::x86_avx_vpermil_pd:
+ case Intrinsic::x86_avx_vpermil_ps_256:
+ case Intrinsic::x86_avx_vpermil_pd_256:
+ return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -9310,16 +9625,53 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // Fix vector shift instructions where the last operand is a non-immediate
- // i32 value.
+ // SSE/AVX shift intrinsics
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -9333,79 +9685,40 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
return SDValue();
unsigned NewIntNo = 0;
- EVT ShAmtVT = MVT::v4i32;
switch (IntNo) {
- case Intrinsic::x86_sse2_pslli_w:
- NewIntNo = Intrinsic::x86_sse2_psll_w;
- break;
- case Intrinsic::x86_sse2_pslli_d:
- NewIntNo = Intrinsic::x86_sse2_psll_d;
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
- case Intrinsic::x86_sse2_pslli_q:
- NewIntNo = Intrinsic::x86_sse2_psll_q;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
break;
- case Intrinsic::x86_sse2_psrli_w:
- NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
break;
- case Intrinsic::x86_sse2_psrli_d:
- NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
break;
- case Intrinsic::x86_sse2_psrli_q:
- NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
break;
- case Intrinsic::x86_sse2_psrai_w:
- NewIntNo = Intrinsic::x86_sse2_psra_w;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
break;
- case Intrinsic::x86_sse2_psrai_d:
- NewIntNo = Intrinsic::x86_sse2_psra_d;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
break;
- default: {
- ShAmtVT = MVT::v2i32;
- switch (IntNo) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntNo = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntNo = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntNo = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntNo = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntNo = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntNo = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntNo = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntNo = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- }
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
// the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
// to be zero.
- SDValue ShOps[4];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, MVT::i32);
- if (ShAmtVT == MVT::v4i32) {
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
- } else {
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
+ DAG.getConstant(0, MVT::i32));
// FIXME this must be lowered to get rid of the invalid type.
- }
EVT VT = Op.getValueType();
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
@@ -9432,13 +9745,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
@@ -9453,7 +9766,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return FrameAddr;
}
@@ -9685,7 +9998,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// Load FP Control Word from stack slot
SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
// Transform as necessary
SDValue CWD1 =
@@ -9745,7 +10058,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op,
+ SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
@@ -9753,26 +10067,41 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
Op = Op.getOperand(0);
if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
}
- // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ // Issue a bsr (scan bits in reverse).
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // And xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = Op.getOperand(0);
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
Op,
- DAG.getConstant(NumBits, OpVT),
+ DAG.getConstant(NumBits, VT),
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
- Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
-
- if (VT == MVT::i8)
- Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
- return Op;
+ return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops));
}
// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
@@ -9824,49 +10153,49 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.getSizeInBits() == 256)
+ if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
return Lower256IntArith(Op, DAG);
- assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+ "Only know how to lower V2I64/V4I64 multiply");
+
DebugLoc dl = Op.getDebugLoc();
- // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
- // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
- // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
- // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
- // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ // Ahi = psrlqi(a, 32);
+ // Bhi = psrlqi(b, 32);
//
- // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
- // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // AloBlo = pmuludq(a, b);
+ // AloBhi = pmuludq(a, Bhi);
+ // AhiBlo = pmuludq(Ahi, b);
+
+ // AloBhi = psllqi(AloBhi, 32);
+ // AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
- SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- A, DAG.getConstant(32, MVT::i32));
- SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- B, DAG.getConstant(32, MVT::i32));
- SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
- A, B);
- SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
- A, Bhi);
- SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
- Ahi, B);
- AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- AloBhi, DAG.getConstant(32, MVT::i32));
- AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue ShAmt = DAG.getConstant(32, MVT::i32);
+
+ SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
+ SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+
+ // Bit cast to 32-bit vectors for MULUDQ
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
+ B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
+ Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
+ Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
+
+ SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
+ SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+ SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+
+ AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
+ AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
- Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
- return Res;
+ return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
@@ -9877,12 +10206,183 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
SDValue Amt = Op.getOperand(1);
LLVMContext *Context = DAG.getContext();
- if (!Subtarget->hasXMMInt())
+ if (!Subtarget->hasSSE2())
return SDValue();
+ // Optimize shl/srl/sra with constant shift amount.
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+
+ if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasAVX2() &&
+ (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ if (Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+
+ if (VT == MVT::v16i8) {
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ // Zero out the rightmost bits.
+ SmallVector<SDValue, 16> V(16,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ // Zero out the leftmost bits.
+ SmallVector<SDValue, 16> V(16,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
+
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
+ }
+ }
+
+ if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ // Zero out the rightmost bits.
+ SmallVector<SDValue, 32> V(32,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ // Zero out the leftmost bits.
+ SmallVector<SDValue, 32> V(32,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
+
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
+ }
+ }
+ }
+ }
+
+ // Lower SHL with variable shift amount.
+ if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
+ Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
+ DAG.getConstant(23, MVT::i32));
+
+ const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
+ Constant *C = ConstantDataVector::get(*Context, CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+ assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
+
+ // a = a << 5;
+ Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
+ DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
+
+ // Turn 'a' into a mask suitable for VSELECT
+ SDValue VSelM = DAG.getConstant(0x80, VT);
+ SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+ SDValue CM1 = DAG.getConstant(0x0f, VT);
+ SDValue CM2 = DAG.getConstant(0x3f, VT);
+
+ // r = VSELECT(r, psllw(r & (char16)15, 4), a);
+ SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
+ M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+ DAG.getConstant(4, MVT::i32), DAG);
+ M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+ OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+ // r = VSELECT(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
+ M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+ DAG.getConstant(2, MVT::i32), DAG);
+ M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+ OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+ // return VSELECT(r, r+r, a);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
+ DAG.getNode(ISD::ADD, dl, VT, R, R), R);
+ return R;
+ }
+
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.getSizeInBits() == 256) {
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
@@ -9897,9 +10397,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// Constant shift amount
SmallVector<SDValue, 4> Amt1Csts;
SmallVector<SDValue, 4> Amt2Csts;
- for (int i = 0; i < NumElems/2; ++i)
+ for (unsigned i = 0; i != NumElems/2; ++i)
Amt1Csts.push_back(Amt->getOperand(i));
- for (int i = NumElems/2; i < NumElems; ++i)
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
@@ -9921,120 +10421,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
}
- // Optimize shl/srl/sra with constant shift amount.
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
- uint64_t ShiftAmt = C->getZExtValue();
-
- if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
- }
- }
-
- // Lower SHL with variable shift amount.
- if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
- Op.getOperand(1), DAG.getConstant(23, MVT::i32));
-
- ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-
- std::vector<Constant*> CV(4, CI);
- Constant *C = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, 16);
-
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
- Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
- return DAG.getNode(ISD::MUL, dl, VT, Op, R);
- }
- if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
- // a = a << 5;
- Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- Op.getOperand(1), DAG.getConstant(5, MVT::i32));
-
- ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
- ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
-
- std::vector<Constant*> CVM1(16, CM1);
- std::vector<Constant*> CVM2(16, CM2);
- Constant *C = ConstantVector::get(CVM1);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, 16);
-
- // r = pblendv(r, psllw(r & (char16)15, 4), a);
- M = DAG.getNode(ISD::AND, dl, VT, R, M);
- M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
- DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
- // a += a
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
- C = ConstantVector::get(CVM2);
- CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, 16);
-
- // r = pblendv(r, psllw(r & (char16)63, 2), a);
- M = DAG.getNode(ISD::AND, dl, VT, R, M);
- M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
- DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
- // a += a
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
- // return pblendv(r, r+r, a);
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
- R, DAG.getNode(ISD::ADD, dl, VT, R, R));
- return R;
- }
return SDValue();
}
@@ -10113,46 +10499,58 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
-SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- SDNode* Node = Op.getNode();
- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- EVT VT = Node->getValueType(0);
- if (Subtarget->hasXMMInt() && VT.isVector()) {
- unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
- ExtraVT.getScalarType().getSizeInBits();
- SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
-
- unsigned SHLIntrinsicsID = 0;
- unsigned SRAIntrinsicsID = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- return SDValue();
- case MVT::v4i32: {
- SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
- SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
- break;
- }
- case MVT::v8i16: {
- SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
- SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
- break;
- }
- }
+ EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT VT = Op.getValueType();
- SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(SHLIntrinsicsID, MVT::i32),
- Node->getOperand(0), ShAmt);
+ if (!Subtarget->hasSSE2() || !VT.isVector())
+ return SDValue();
- // In case of 1 bit sext, no need to shr
- if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(SRAIntrinsicsID, MVT::i32),
- Tmp1, ShAmt);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v8i32:
+ case MVT::v16i16:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ if (!Subtarget->hasAVX2()) {
+ // needs to be split
+ int NumElems = VT.getVectorNumElements();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ int ExtraNumElems = ExtraVT.getVectorNumElements();
+ ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
+ ExtraNumElems/2);
+ SDValue Extra = DAG.getValueType(ExtraVT);
+
+ LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
+ LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+ }
+ // fall through
+ case MVT::v4i32:
+ case MVT::v8i16: {
+ SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
+ Op.getOperand(0), ShAmt, DAG);
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+ }
}
-
- return SDValue();
}
@@ -10161,7 +10559,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
// Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
// There isn't any reason to disable it if the target processor supports it.
- if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) {
+ if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
SDValue Chain = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
@@ -10215,7 +10613,7 @@ SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op,
// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
// no-sse2). There isn't any reason to disable it if the target processor
// supports it.
- if (Subtarget->hasXMMInt() || Subtarget->is64Bit())
+ if (Subtarget->hasSSE2() || Subtarget->is64Bit())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
@@ -10246,8 +10644,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
unsigned Reg = 0;
unsigned size = 0;
switch(T.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Invalid value type!");
+ default: llvm_unreachable("Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
@@ -10295,7 +10692,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() &&
+ assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
@@ -10365,7 +10762,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
- default: assert(0 && "Invalid code");
+ default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = X86ISD::ADD; break;
case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
case ISD::SUBC: Opc = X86ISD::SUB; break;
@@ -10432,6 +10829,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SRA:
@@ -10506,8 +10904,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
default:
- assert(false && "Do not know how to custom type legalize this operation!");
- return;
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
@@ -10515,15 +10912,25 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SUBE:
// We don't want to expand or promote these.
return;
- case ISD::FP_TO_SINT: {
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
+ return;
+
std::pair<SDValue,SDValue> Vals =
- FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+ FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode() != 0) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
- Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
- MachinePointerInfo(), false, false, 0));
+ if (StackSlot.getNode() != 0)
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(),
+ false, false, false, 0));
+ else
+ Results.push_back(FIST);
}
return;
}
@@ -10657,15 +11064,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
- case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
- case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
- case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::PSIGN: return "X86ISD::PSIGN";
+ case X86ISD::BLENDV: return "X86ISD::BLENDV";
+ case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
+ case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
+ case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
+ case X86ISD::HADD: return "X86ISD::HADD";
+ case X86ISD::HSUB: return "X86ISD::HSUB";
+ case X86ISD::FHADD: return "X86ISD::FHADD";
+ case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
- case X86ISD::FHADD: return "X86ISD::FHADD";
- case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
@@ -10681,18 +11092,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
+ case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";
- case X86ISD::CMPPD: return "X86ISD::CMPPD";
- case X86ISD::CMPPS: return "X86ISD::CMPPS";
- case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
- case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
- case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
- case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
- case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
- case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
- case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
- case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::VSRA: return "X86ISD::VSRA";
+ case X86ISD::VSHLI: return "X86ISD::VSHLI";
+ case X86ISD::VSRLI: return "X86ISD::VSRLI";
+ case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::CMPP: return "X86ISD::CMPP";
+ case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
+ case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
@@ -10705,54 +11115,39 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::ANDN: return "X86ISD::ANDN";
+ case X86ISD::BLSI: return "X86ISD::BLSI";
+ case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
+ case X86ISD::BLSR: return "X86ISD::BLSR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::PALIGN: return "X86ISD::PALIGN";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
- case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
- case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD";
- case X86ISD::SHUFPS: return "X86ISD::SHUFPS";
- case X86ISD::SHUFPD: return "X86ISD::SHUFPD";
+ case X86ISD::SHUFP: return "X86ISD::SHUFP";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
- case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
- case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD";
- case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
- case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
- case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
- case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
- case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
- case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
- case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
- case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
- case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
- case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
- case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
- case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
- case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
+ case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
- case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
- case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
- case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
- case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
- case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
+ case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
+ case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
+ case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
+ case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
}
}
@@ -10855,21 +11250,21 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX());
+ return false;
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, VT) ||
- isSHUFPMask(M, VT) ||
+ isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
isPSHUFDMask(M, VT) ||
isPSHUFHWMask(M, VT) ||
isPSHUFLWMask(M, VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
- isUNPCKLMask(M, VT) ||
- isUNPCKHMask(M, VT) ||
- isUNPCKL_v_undef_Mask(M, VT) ||
- isUNPCKH_v_undef_Mask(M, VT));
+ isPALIGNRMask(M, VT, Subtarget) ||
+ isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
+ isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
+ isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
+ isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2()));
}
bool
@@ -10882,8 +11277,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
if (NumElts == 4 && VT.getSizeInBits() == 128) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
- isSHUFPMask(Mask, VT) ||
- isCommutedSHUFPMask(Mask, VT));
+ isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
+ isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true));
}
return false;
}
@@ -10902,7 +11297,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
unsigned CXchgOpc,
unsigned notOpc,
unsigned EAXreg,
- TargetRegisterClass *RC,
+ const TargetRegisterClass *RC,
bool invSrc) const {
// For the atomic bitwise operator, we generate
// thisMBB:
@@ -11274,7 +11669,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
- assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
@@ -11679,6 +12074,42 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
return EndMBB;
}
+// The EFLAGS operand of SelectItr might be missing a kill marker
+// because there were multiple uses of EFLAGS, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
+ MachineBasicBlock* BB,
+ const TargetRegisterInfo* TRI) {
+ // Scan forward through BB for a use/def of EFLAGS.
+ MachineBasicBlock::iterator miI(llvm::next(SelectItr));
+ for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+ const MachineInstr& mi = *miI;
+ if (mi.readsRegister(X86::EFLAGS))
+ return false;
+ if (mi.definesRegister(X86::EFLAGS))
+ break; // Should have kill-flag - update below.
+ }
+
+ // If we hit the end of the block, check whether EFLAGS is live into a
+ // successor.
+ if (miI == BB->end()) {
+ for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+ sEnd = BB->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock* succ = *sItr;
+ if (succ->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+ }
+
+ // We found a def, or hit the end of the basic block and EFLAGS wasn't live
+ // out. SelectMI should have a kill flag on EFLAGS.
+ SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
+ return true;
+}
+
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -11708,7 +12139,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- if (!MI->killsRegister(X86::EFLAGS)) {
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ if (!MI->killsRegister(X86::EFLAGS) &&
+ !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
@@ -11753,7 +12186,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- assert(EnableSegmentedStacks);
+ assert(getTargetMachine().Options.EnableSegmentedStacks);
unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
@@ -11785,6 +12218,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
+ SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI->getOperand(1).getReg(),
physSPReg = Is64Bit ? X86::RSP : X86::ESP;
@@ -11802,33 +12236,39 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg)
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg)
- .addReg(tmpSPVReg);
+ .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
+ .addReg(SPLimitVReg);
BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
- .addReg(tmpSPVReg);
+ .addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
- .addReg(tmpSPVReg);
+ .addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
+ const uint32_t *RegMask =
+ getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Is64Bit) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI);
+ .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI)
+ .addRegMask(RegMask)
+ .addReg(X86::RAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space");
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
@@ -11926,6 +12366,11 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
+ // Get a register mask for the lowered call.
+ // FIXME: The 32-bit calls have non-standard calling conventions. Use a
+ // proper register mask.
+ const uint32_t *RegMask =
+ getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -11936,6 +12381,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
+ MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
@@ -11946,6 +12392,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
+ MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
@@ -11956,6 +12403,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
+ MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
@@ -11966,30 +12414,14 @@ MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: assert(0 && "Unexpected instr type to insert");
+ default: llvm_unreachable("Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
- assert(0 && "TAILJMP64 would not be touched here.");
+ llvm_unreachable("TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
- // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
- // On AMD64, additional defs should be added before register allocation.
- if (!Subtarget->isTargetWin64()) {
- MI->addRegisterDefined(X86::RSI);
- MI->addRegisterDefined(X86::RDI);
- MI->addRegisterDefined(X86::XMM6);
- MI->addRegisterDefined(X86::XMM7);
- MI->addRegisterDefined(X86::XMM8);
- MI->addRegisterDefined(X86::XMM9);
- MI->addRegisterDefined(X86::XMM10);
- MI->addRegisterDefined(X86::XMM11);
- MI->addRegisterDefined(X86::XMM12);
- MI->addRegisterDefined(X86::XMM13);
- MI->addRegisterDefined(X86::XMM14);
- MI->addRegisterDefined(X86::XMM15);
- }
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
@@ -12294,11 +12726,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
+ unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
@@ -12307,7 +12739,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
switch (Opc) {
default: break;
case X86ISD::ADD:
@@ -12326,8 +12758,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
break;
// Fallthrough
case X86ISD::SETCC:
- KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
- Mask.getBitWidth() - 1);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -12339,18 +12770,20 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case Intrinsic::x86_sse2_movmsk_pd:
case Intrinsic::x86_avx_movmsk_pd_256:
case Intrinsic::x86_mmx_pmovmskb:
- case Intrinsic::x86_sse2_pmovmskb_128: {
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx2_pmovmskb: {
// High bits of movmskp{s|d}, pmovmskb are known zero.
switch (IntId) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
+ case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break;
}
- KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
- Mask.getBitWidth() - NumLoBits);
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
break;
}
}
@@ -12418,7 +12851,8 @@ static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget* Subtarget) {
DebugLoc dl = N->getDebugLoc();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
@@ -12454,9 +12888,23 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
return SDValue();
+ // If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
+ SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
+ SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+ Ld->getMemoryVT(),
+ Ld->getPointerInfo(),
+ Ld->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
+ return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+ }
+
// Emit a zeroed vector and insert the desired subvector on its
// first half.
- SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
DAG.getConstant(0, MVT::i32), DAG, dl);
return DCI.CombineTo(N, InsV);
@@ -12501,7 +12949,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
- return PerformShuffleCombine256(N, DAG, DCI);
+ return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
if (VT.getSizeInBits() != 128)
@@ -12517,11 +12965,185 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
+
+/// PerformTruncateCombine - Converts truncate operation to
+/// a sequence of vector shuffle operations.
+/// It is possible when we truncate 256-bit vector to 128-bit vector
+
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+ DAGCombinerInfo &DCI) const {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!Subtarget->hasAVX()) return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+ DAG.getIntPtrConstant(2));
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
+
+ // PSHUFD
+ int ShufMask1[] = {0, 2, 0, 0};
+
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT),
+ ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT),
+ ShufMask1);
+
+ // MOVLHPS
+ int ShufMask2[] = {0, 1, 4, 5};
+
+ return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
+ }
+ if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
+ DAG.getIntPtrConstant(4));
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
+
+ // PSHUFB
+ int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo,
+ DAG.getUNDEF(MVT::v16i8),
+ ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi,
+ DAG.getUNDEF(MVT::v16i8),
+ ShufMask1);
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
+
+ // MOVLHPS
+ int ShufMask2[] = {0, 1, 4, 5};
+
+ SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
+ }
+
+ return SDValue();
+}
+
+/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
+/// specific shuffle of a load can be folded into a single element load.
+/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
+/// shuffles have been customed lowered so we need to handle those here.
+static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue InVec = N->getOperand(0);
+ SDValue EltNo = N->getOperand(1);
+
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+
+ EVT VT = InVec.getValueType();
+
+ bool HasShuffleIntoBitcast = false;
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+ return SDValue();
+ InVec = InVec.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ if (!isTargetShuffle(InVec.getOpcode()))
+ return SDValue();
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ SmallVector<int, 16> ShuffleMask;
+ bool UnaryShuffle;
+ if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
+ : InVec.getOperand(1);
+
+ // If inputs to shuffle are the same for both ops, then allow 2 uses
+ unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+
+ if (LdNode.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
+ return SDValue();
+
+ AllowedUses = 1; // only allow 1 load use if we have a bitcast
+ LdNode = LdNode.getOperand(0);
+ }
+
+ if (!ISD::isNormalLoad(LdNode.getNode()))
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
+
+ if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+ return SDValue();
+
+ if (HasShuffleIntoBitcast) {
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return SDValue();
+ }
+
+ // All checks match so transform back to vector_shuffle so that DAG combiner
+ // can finish the job
+ DebugLoc dl = N->getDebugLoc();
+
+ // Create shuffle node taking into account the case that its a unary shuffle
+ SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
+ Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+ InVec.getOperand(0), Shuffle,
+ &ShuffleMask[0]);
+ Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
+ EltNo);
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
+ if (NewOp.getNode())
+ return NewOp;
+
SDValue InputVector = N->getOperand(0);
// Only operate on vectors of 4 elements, where the alternative shuffling
@@ -12582,6 +13204,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
@@ -12590,7 +13213,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Load the scalar.
SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
ScalarAddr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -12603,7 +13226,10 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+
+
DebugLoc DL = N->getDebugLoc();
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
@@ -12617,7 +13243,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// ignored in unsafe-math mode).
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
- (Subtarget->hasXMMInt() ||
+ (Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -12632,7 +13258,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
@@ -12642,7 +13268,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
@@ -12660,7 +13286,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
@@ -12670,7 +13296,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
@@ -12696,7 +13322,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
@@ -12706,7 +13332,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
@@ -12731,7 +13357,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
@@ -12848,6 +13474,57 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Canonicalize max and min:
+ // (x > y) ? x : y -> (x >= y) ? x : y
+ // (x < y) ? x : y -> (x <= y) ? x : y
+ // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
+ // the need for an extra compare
+ // against zero. e.g.
+ // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
+ // subl %esi, %edi
+ // testl %edi, %edi
+ // movl $0, %eax
+ // cmovgl %edi, %eax
+ // =>
+ // xorl %eax, %eax
+ // subl %esi, $edi
+ // cmovsl %eax, %edi
+ if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
+ DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGT: {
+ ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
+ Cond = DAG.getSetCC(Cond.getDebugLoc(), Cond.getValueType(),
+ Cond.getOperand(0), Cond.getOperand(1), NewCC);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS);
+ }
+ }
+ }
+
+ // If we know that this node is legal then we know that it is going to be
+ // matched by one of the SSE/AVX BLEND instructions. These instructions only
+ // depend on the highest bit in each word. Try to use SimplifyDemandedBits
+ // to simplify previous instructions.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
+ !DCI.isBeforeLegalize() &&
+ TLI.isOperationLegal(ISD::VSELECT, VT)) {
+ unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+ assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+ DCI.isBeforeLegalizeOps());
+ if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
return SDValue();
}
@@ -13042,7 +13719,8 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
// since the result of setcc_c is all zero's or all ones.
- if (N1C && N0.getOpcode() == ISD::AND &&
+ if (VT.isInteger() && !VT.isVector() &&
+ N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
@@ -13058,26 +13736,46 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
}
}
+
+ // Hardware support for vector shifts is sparse which makes us scalarize the
+ // vector operations in many cases. Also, on sandybridge ADD is faster than
+ // shl.
+ // (shl V, 1) -> add V,V
+ if (isSplatVector(N1.getNode())) {
+ assert(N0.getValueType().isVector() && "Invalid vector shift type");
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
+ // We shift all of the values by one. In many cases we do not have
+ // hardware support for this operation. This is better expressed as an ADD
+ // of two values.
+ if (N1C && (1 == N1C->getZExtValue())) {
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N0);
+ }
+ }
+
return SDValue();
}
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
- if (!VT.isVector() && VT.isInteger() &&
- N->getOpcode() == ISD::SHL)
- return PerformSHLCombine(N, DAG);
+ if (N->getOpcode() == ISD::SHL) {
+ SDValue V = PerformSHLCombine(N, DAG);
+ if (V.getNode()) return V;
+ }
// On X86 with SSE2 support, we can transform this to a vector shift if
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
// so we have no knowledge of the shift amount.
- if (!Subtarget->hasXMMInt())
+ if (!Subtarget->hasSSE2())
return SDValue();
- if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+ (!Subtarget->hasAVX2() ||
+ (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
return SDValue();
SDValue ShAmtOp = N->getOperand(1);
@@ -13093,6 +13791,11 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
BaseShAmt = Arg;
break;
}
+ // Handle the case where the build_vector is all undef
+ // FIXME: Should DAG allow this?
+ if (i == NumElts)
+ return SDValue();
+
for (; i != NumElts; ++i) {
SDValue Arg = ShAmtOp.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
@@ -13119,9 +13822,16 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
BaseShAmt = InVec.getOperand(1);
}
}
- if (BaseShAmt.getNode() == 0)
+ if (BaseShAmt.getNode() == 0) {
+ // Don't create instructions with illegal types after legalize
+ // types has run.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
+ !DCI.isBeforeLegalize())
+ return SDValue();
+
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
DAG.getIntPtrConstant(0));
+ }
} else
return SDValue();
@@ -13136,47 +13846,38 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
switch (N->getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
- break;
case ISD::SHL:
- if (VT == MVT::v2i64)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v4i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- ValOp, BaseShAmt);
- break;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
+ }
case ISD::SRA:
- if (VT == MVT::v4i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
- ValOp, BaseShAmt);
- break;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
+ }
case ISD::SRL:
- if (VT == MVT::v2i64)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v4i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
- ValOp, BaseShAmt);
- break;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
+ }
}
- return SDValue();
}
@@ -13190,7 +13891,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
- if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
@@ -13300,7 +14001,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
- // Create ANDN instructions
+ // Create ANDN, BLSI, and BLSR instructions
+ // BLSI is X & (-X)
+ // BLSR is X & (X-1)
if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -13313,6 +14016,26 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
+ // Check LHS for neg
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+ isZero(N0.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+
+ // Check RHS for neg
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+ isZero(N1.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+
+ // Check LHS for X-1
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+
+ // Check RHS for X-1
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+
return SDValue();
}
@@ -13353,98 +14076,87 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return R;
EVT VT = N->getValueType(0);
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
- return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// look for psign/blend
- if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
- if (VT == MVT::v2i64) {
- // Canonicalize pandn to RHS
- if (N0.getOpcode() == X86ISD::ANDNP)
- std::swap(N0, N1);
- // or (and (m, x), (pandn m, y))
- if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
- SDValue Mask = N1.getOperand(0);
- SDValue X = N1.getOperand(1);
- SDValue Y;
- if (N0.getOperand(0) == Mask)
- Y = N0.getOperand(1);
- if (N0.getOperand(1) == Mask)
- Y = N0.getOperand(0);
-
- // Check to see if the mask appeared in both the AND and ANDNP and
- if (!Y.getNode())
- return SDValue();
-
- // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
- if (Mask.getOpcode() != ISD::BITCAST ||
- X.getOpcode() != ISD::BITCAST ||
- Y.getOpcode() != ISD::BITCAST)
- return SDValue();
-
- // Look through mask bitcast.
- Mask = Mask.getOperand(0);
- EVT MaskVT = Mask.getValueType();
-
- // Validate that the Mask operand is a vector sra node. The sra node
- // will be an intrinsic.
- if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
- return SDValue();
-
- // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
- // there is no psrai.b
- switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- break;
- default: return SDValue();
- }
-
- // Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(2);
- unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
- unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
- if ((SraAmt + 1) != EltBits)
- return SDValue();
+ if (VT == MVT::v2i64 || VT == MVT::v4i64) {
+ if (!Subtarget->hasSSSE3() ||
+ (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+ return SDValue();
- DebugLoc DL = N->getDebugLoc();
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::ANDNP)
+ std::swap(N0, N1);
+ // or (and (m, y), (pandn m, x))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and ANDNP and
+ if (!Y.getNode())
+ return SDValue();
- // Now we know we at least have a plendvb with the mask val. See if
- // we can form a psignb/w/d.
- // psign = x.type == y.type == mask.type && y = sub(0, x);
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ // Look through mask bitcast.
+ if (Mask.getOpcode() == ISD::BITCAST)
+ Mask = Mask.getOperand(0);
+ if (X.getOpcode() == ISD::BITCAST)
X = X.getOperand(0);
+ if (Y.getOpcode() == ISD::BITCAST)
Y = Y.getOperand(0);
- if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
- ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
- X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
- unsigned Opc = 0;
- switch (EltBits) {
- case 8: Opc = X86ISD::PSIGNB; break;
- case 16: Opc = X86ISD::PSIGNW; break;
- case 32: Opc = X86ISD::PSIGND; break;
- default: break;
- }
- if (Opc) {
- SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
- }
- }
- // PBLENDVB only available on SSE 4.1
- if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
- return SDValue();
-
- X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
- Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
- Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
- Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node.
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ if (Mask.getOpcode() != X86ISD::VSRAI)
+ return SDValue();
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(1);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
+ assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
+ "Unsupported VT for PSIGN");
+ Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+
+ X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
+ Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
}
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
@@ -13500,6 +14212,36 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ assert(Subtarget->hasBMI() && "Creating BLSMSK requires BMI instructions");
+
+ // Create BLSMSK instructions by finding X ^ (X-1)
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1);
+
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0);
+
+ return SDValue();
+}
+
/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -13515,7 +14257,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// shuffle. We need SSE4 for the shuffles.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
- if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ if (RegVT.isVector() && RegVT.isInteger() &&
+ Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
@@ -13553,7 +14296,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
- Ld->isNonTemporal(), Ld->getAlignment());
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
// Insert the word loaded into a vector.
SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -13561,7 +14305,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector);
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT,
+ ScalarInVector);
unsigned SizeRatio = RegSz/MemSz;
// Redistribute the loaded elements into the different locations.
@@ -13593,7 +14338,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // If we are saving a concatination of two XMM registers, perform two stores.
+ // If we are saving a concatenation of two XMM registers, perform two stores.
// This is better in Sandy Bridge cause one 256-bit mem op is done via two
// 128-bit ones. If in the future the cost becomes only one memory access the
// first version would be better.
@@ -13703,8 +14448,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
- && Subtarget->hasXMMInt();
+ bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
+ && Subtarget->hasSSE2();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
@@ -13722,7 +14467,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
- for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
@@ -13749,7 +14494,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
- Ld->isNonTemporal(), Ld->getAlignment());
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
@@ -13770,10 +14516,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
+ Ld->isInvariant(), Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(),
MinAlign(Ld->getAlignment(), 4));
SDValue NewChain = LoLd.getValue(1);
@@ -13817,7 +14564,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
/// set to A, RHS to B, and the routine returns 'true'.
/// Note that the binary operation should have the property that if one of the
/// operands is UNDEF then the result is UNDEF.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// Look for the following pattern: if
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
@@ -13833,7 +14580,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
return false;
EVT VT = LHS.getValueType();
- unsigned N = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for horizontal add/sub");
+
+ // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
+ // operate independently on 128-bit lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ assert((NumLaneElts % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ unsigned HalfLaneElts = NumLaneElts/2;
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
@@ -13842,34 +14600,36 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
// NOTE: in what follows a default initialized SDValue represents an UNDEF of
// type VT.
SDValue A, B;
- SmallVector<int, 8> LMask(N);
+ SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
A = LHS.getOperand(0);
if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
B = LHS.getOperand(1);
- cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
+ std::copy(Mask.begin(), Mask.end(), LMask.begin());
} else {
if (LHS.getOpcode() != ISD::UNDEF)
A = LHS;
- for (unsigned i = 0; i != N; ++i)
+ for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
- SmallVector<int, 8> RMask(N);
+ SmallVector<int, 16> RMask(NumElts);
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
C = RHS.getOperand(0);
if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
D = RHS.getOperand(1);
- cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
+ std::copy(Mask.begin(), Mask.end(), RMask.begin());
} else {
if (RHS.getOpcode() != ISD::UNDEF)
C = RHS;
- for (unsigned i = 0; i != N; ++i)
+ for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
@@ -13884,30 +14644,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
- for (unsigned i = 0; i != N; ++i) {
- unsigned Idx = RMask[i];
- if (Idx < N)
- RMask[i] += N;
- else if (Idx < 2*N)
- RMask[i] -= N;
- }
+ CommuteVectorShuffleMask(RMask, NumElts);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
// RHS = VECTOR_SHUFFLE A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
- for (unsigned i = 0; i != N; ++i) {
- unsigned LIdx = LMask[i], RIdx = RMask[i];
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int LIdx = LMask[i], RIdx = RMask[i];
// Ignore any UNDEF components.
- if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
- || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+ if (LIdx < 0 || RIdx < 0 ||
+ (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+ (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
// Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
- if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
- !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+ unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
+ unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
+ int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+ if (!(LIdx == Index && RIdx == Index + 1) &&
+ !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
}
@@ -13924,8 +14682,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
- (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -13939,8 +14697,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal subs from subs of shuffles.
- if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
- (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14006,7 +14764,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ // Optimize vectors in AVX mode
+ // Sign extend v8i16 to v8i32 and
+ // v4i32 to v4i64
+ //
+ // Divide input vector into two parts
+ // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+ // concat the vectors to original VT
+
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
+ (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
+
+ unsigned NumElems = OpVT.getVectorNumElements();
+ SmallVector<int,8> ShufMask1(NumElems, -1);
+ for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
+
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
+ ShufMask1.data());
+
+ SmallVector<int,8> ShufMask2(NumElems, -1);
+ for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2;
+
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
+ ShufMask2.data());
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+ OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ }
+ return SDValue();
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
// This eliminates the zext. This transformation is necessary because
@@ -14014,6 +14823,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
DebugLoc dl = N->getDebugLoc();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
@@ -14028,6 +14839,37 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, VT));
}
+ // Optimize vectors in AVX mode:
+ //
+ // v8i16 -> v8i32
+ // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
+ // Concat upper and lower parts.
+ //
+ // v4i32 -> v4i64
+ // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
+ // Concat upper and lower parts.
+ //
+ if (Subtarget->hasAVX()) {
+
+ if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
+ ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
+
+ SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
+ SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG);
+ SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG);
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ }
+ }
+
return SDValue();
}
@@ -14136,7 +14978,24 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
-static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+/// PerformADDCombine - Do target-specific dag combines on integer adds.
+static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ isHorizontalBinOp(Op0, Op1, true))
+ return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -14158,6 +15017,13 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
}
}
+ // Try to synthesize horizontal adds from adds of shuffles.
+ EVT VT = N->getValueType(0);
+ if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ isHorizontalBinOp(Op0, Op1, true))
+ return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
+
return OptimizeConditionalInDecrement(N, DAG);
}
@@ -14167,19 +15033,20 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
- case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
- case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
- case ISD::SUB: return PerformSubCombine(N, DAG);
+ case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
+ case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
+ case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
@@ -14190,27 +15057,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
- case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
+ case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
- case X86ISD::SHUFPS: // Handle all target specific shuffles
- case X86ISD::SHUFPD:
+ case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
@@ -14218,11 +15072,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
}
@@ -14330,11 +15181,38 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
+namespace {
+ // Helper to match a string separated by whitespace.
+ bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
+ s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
+
+ for (unsigned i = 0, e = args.size(); i != e; ++i) {
+ StringRef piece(*args[i]);
+ if (!s.startswith(piece)) // Check if the piece matches.
+ return false;
+
+ s = s.substr(piece.size());
+ StringRef::size_type pos = s.find_first_not_of(" \t");
+ if (pos == 0) // We matched a prefix.
+ return false;
+
+ s = s.substr(pos);
+ }
+
+ return s.empty();
+ }
+ const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
+}
+
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
std::string AsmStr = IA->getAsmString();
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
@@ -14342,35 +15220,27 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
switch (AsmPieces.size()) {
default: return false;
case 1:
- AsmStr = AsmPieces[0];
- AsmPieces.clear();
- SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
-
// FIXME: this should verify that we are targeting a 486 or better. If not,
- // we will turn this bswap into something that will be lowered to logical ops
- // instead of emitting the bswap asm. For now, we don't support 486 or lower
- // so don't worry about this.
+ // we will turn this bswap into something that will be lowered to logical
+ // ops instead of emitting the bswap asm. For now, we don't support 486 or
+ // lower so don't worry about this.
// bswap $0
- if (AsmPieces.size() == 2 &&
- (AsmPieces[0] == "bswap" ||
- AsmPieces[0] == "bswapq" ||
- AsmPieces[0] == "bswapl") &&
- (AsmPieces[1] == "$0" ||
- AsmPieces[1] == "${0:q}")) {
+ if (matchAsm(AsmPieces[0], "bswap", "$0") ||
+ matchAsm(AsmPieces[0], "bswapl", "$0") ||
+ matchAsm(AsmPieces[0], "bswapq", "$0") ||
+ matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
+ matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
+ matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
- IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
return IntrinsicLowering::LowerToByteSwap(CI);
}
+
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
- AsmPieces.size() == 3 &&
- (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
- AsmPieces[1] == "$$8," &&
- AsmPieces[2] == "${0:w}" &&
- IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
+ (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
+ matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -14379,46 +15249,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}") {
- IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
- return IntrinsicLowering::LowerToByteSwap(CI);
- }
+ AsmPieces[3] == "~{fpsr}")
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
if (CI->getType()->isIntegerTy(32) &&
- IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
- SmallVector<StringRef, 4> Words;
- SplitString(AsmPieces[0], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
- Words[2] == "${0:w}") {
- Words.clear();
- SplitString(AsmPieces[1], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
- Words[2] == "$0") {
- Words.clear();
- SplitString(AsmPieces[2], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
- Words[2] == "${0:w}") {
- AsmPieces.clear();
- const std::string &ConstraintsStr = IA->getConstraintString();
- SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}") {
- IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
- return IntrinsicLowering::LowerToByteSwap(CI);
- }
- }
- }
- }
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
+ matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
+ matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
+ matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}")
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
if (CI->getType()->isIntegerTy(64)) {
@@ -14427,23 +15277,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
- SmallVector<StringRef, 4> Words;
- SplitString(AsmPieces[0], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
- Words.clear();
- SplitString(AsmPieces[1], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
- Words.clear();
- SplitString(AsmPieces[2], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
- Words[2] == "%edx") {
- IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
- return IntrinsicLowering::LowerToByteSwap(CI);
- }
- }
- }
+ if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
+ matchAsm(AsmPieces[1], "bswap", "%edx") &&
+ matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
@@ -14538,7 +15375,8 @@ TargetLowering::ConstraintWeight
break;
case 'x':
case 'Y':
- if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+ if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
+ ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX()))
weight = CW_Register;
break;
case 'I':
@@ -14608,9 +15446,9 @@ LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
- if (Subtarget->hasXMMInt())
+ if (Subtarget->hasSSE2())
return "Y";
- if (Subtarget->hasXMM())
+ if (Subtarget->hasSSE1())
return "x";
}
@@ -14816,10 +15654,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
case 'Y': // SSE_REGS if SSE2 allowed
- if (!Subtarget->hasXMMInt()) break;
+ if (!Subtarget->hasSSE2()) break;
// FALL THROUGH.
- case 'x': // SSE_REGS if SSE1 allowed
- if (!Subtarget->hasXMM()) break;
+ case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
+ if (!Subtarget->hasSSE1()) break;
switch (VT.getSimpleVT().SimpleTy) {
default: break;
@@ -14838,6 +15676,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case MVT::v4f32:
case MVT::v2f64:
return std::make_pair(0U, X86::VR128RegisterClass);
+ // AVX types.
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v8f32:
+ case MVT::v4f64:
+ return std::make_pair(0U, X86::VR256RegisterClass);
+
}
break;
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 342a5e617545..4e0073365a73 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -172,12 +172,23 @@ namespace llvm {
/// ANDNP - Bitwise Logical AND NOT of Packed FP values.
ANDNP,
- /// PSIGNB/W/D - Copy integer sign.
- PSIGNB, PSIGNW, PSIGND,
+ /// PSIGN - Copy integer sign.
+ PSIGN,
- /// BLEND family of opcodes
+ /// BLENDV - Blend where the selector is an XMM.
BLENDV,
+ /// BLENDxx - Blend where the selector is an immediate.
+ BLENDPW,
+ BLENDPS,
+ BLENDPD,
+
+ /// HADD - Integer horizontal add.
+ HADD,
+
+ /// HSUB - Integer horizontal sub.
+ HSUB,
+
/// FHADD - Floating point horizontal add.
FHADD,
@@ -213,16 +224,26 @@ namespace llvm {
// VZEXT_MOVL - Vector move low and zero extend.
VZEXT_MOVL,
- // VSHL, VSRL - Vector logical left / right shift.
- VSHL, VSRL,
+ // VSEXT_MOVL - Vector move low and sign extend.
+ VSEXT_MOVL,
+
+ // VSHL, VSRL - 128-bit vector logical left / right shift
+ VSHLDQ, VSRLDQ,
+
+ // VSHL, VSRL, VSRA - Vector shift elements
+ VSHL, VSRL, VSRA,
- // CMPPD, CMPPS - Vector double/float comparison.
- // CMPPD, CMPPS - Vector double/float comparison.
- CMPPD, CMPPS,
+ // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+ VSHLI, VSRLI, VSRAI,
+
+ // CMPP - Vector packed double/float comparison.
+ CMPP,
// PCMP* - Vector integer comparisons.
- PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
- PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+ PCMPEQ, PCMPGT,
+
+ // VPCOM, VPCOMU - XOP Vector integer comparisons.
+ VPCOM, VPCOMU,
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
@@ -230,6 +251,10 @@ namespace llvm {
ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
+ BLSI, // BLSI - Extract lowest set isolated bit
+ BLSMSK, // BLSMSK - Get mask up to lowest set bit
+ BLSR, // BLSR - Reset lowest set bit
+
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
@@ -246,46 +271,26 @@ namespace llvm {
PSHUFD,
PSHUFHW,
PSHUFLW,
- PSHUFHW_LD,
- PSHUFLW_LD,
- SHUFPD,
- SHUFPS,
+ SHUFP,
MOVDDUP,
MOVSHDUP,
MOVSLDUP,
- MOVSHDUP_LD,
- MOVSLDUP_LD,
MOVLHPS,
MOVLHPD,
MOVHLPS,
- MOVHLPD,
MOVLPS,
MOVLPD,
MOVSD,
MOVSS,
- UNPCKLPS,
- UNPCKLPD,
- VUNPCKLPSY,
- VUNPCKLPDY,
- UNPCKHPS,
- UNPCKHPD,
- VUNPCKHPSY,
- VUNPCKHPDY,
- PUNPCKLBW,
- PUNPCKLWD,
- PUNPCKLDQ,
- PUNPCKLQDQ,
- PUNPCKHBW,
- PUNPCKHWD,
- PUNPCKHDQ,
- PUNPCKHQDQ,
- VPERMILPS,
- VPERMILPSY,
- VPERMILPD,
- VPERMILPDY,
- VPERM2F128,
+ UNPCKL,
+ UNPCKH,
+ VPERMILP,
+ VPERM2X128,
VBROADCAST,
+ // PMULUDQ - Vector multiply packed unsigned doubleword integers
+ PMULUDQ,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -299,6 +304,9 @@ namespace llvm {
// falls back to heap allocation if not.
SEG_ALLOCA,
+ // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+ WIN_FTOL,
+
// Memory barrier
MEMBARRIER,
MFENCE,
@@ -368,75 +376,6 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace X86 {
- /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to PSHUFD.
- bool isPSHUFDMask(ShuffleVectorSDNode *N);
-
- /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to PSHUFD.
- bool isPSHUFHWMask(ShuffleVectorSDNode *N);
-
- /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to PSHUFD.
- bool isPSHUFLWMask(ShuffleVectorSDNode *N);
-
- /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to SHUFP*.
- bool isSHUFPMask(ShuffleVectorSDNode *N);
-
- /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
- bool isMOVHLPSMask(ShuffleVectorSDNode *N);
-
- /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
- /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
- /// <2, 3, 2, 3>
- bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
-
- /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
- bool isMOVLPMask(ShuffleVectorSDNode *N);
-
- /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
- /// as well as MOVLHPS.
- bool isMOVLHPSMask(ShuffleVectorSDNode *N);
-
- /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to UNPCKL.
- bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
-
- /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to UNPCKH.
- bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
-
- /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
- /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
- /// <0, 0, 1, 1>
- bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
-
- /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
- /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
- /// <2, 2, 3, 3>
- bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
-
- /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to MOVSS,
- /// MOVSD, and MOVD, i.e. setting the lowest element.
- bool isMOVLMask(ShuffleVectorSDNode *N);
-
- /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
- bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
-
- /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
- bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
-
- /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
- bool isMOVDDUPMask(ShuffleVectorSDNode *N);
-
/// isVEXTRACTF128Index - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF128.
@@ -447,23 +386,6 @@ namespace llvm {
/// suitable for input to VINSERTF128.
bool isVINSERTF128Index(SDNode *N);
- /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
- /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
- /// instructions.
- unsigned getShuffleSHUFImmediate(SDNode *N);
-
- /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
- /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
- unsigned getShufflePSHUFHWImmediate(SDNode *N);
-
- /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
- /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
- unsigned getShufflePSHUFLWImmediate(SDNode *N);
-
- /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
- /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
- unsigned getShufflePALIGNRImmediate(SDNode *N);
-
/// getExtractVEXTRACTF128Immediate - Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF128 instructions.
@@ -529,7 +451,7 @@ namespace llvm {
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
- /// 'NonScalarIntSafe' is true, that means it's safe to return a
+ /// 'IsZeroVal' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
@@ -537,7 +459,7 @@ namespace llvm {
/// target-independent logic.
virtual EVT
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe, bool MemcpyStrSrc,
+ bool IsZeroVal, bool MemcpyStrSrc,
MachineFunction &MF) const;
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
@@ -587,7 +509,6 @@ namespace llvm {
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -697,6 +618,18 @@ namespace llvm {
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
+ /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
+ /// for fptoui.
+ bool isTargetFTOL() const {
+ return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
+ }
+
+ /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
+ /// used for fptoui to the given type.
+ bool isIntegerTypeFTOL(EVT VT) const {
+ return isTargetFTOL() && VT == MVT::i64;
+ }
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
@@ -779,7 +712,8 @@ namespace llvm {
SelectionDAG &DAG) const;
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool isSigned) const;
+ bool isSigned,
+ bool isReplace) const;
SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG) const;
@@ -833,6 +767,7 @@ namespace llvm {
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
@@ -846,9 +781,12 @@ namespace llvm {
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const;
// Utility functions to help LowerVECTOR_SHUFFLE
SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
+ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -857,8 +795,8 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -872,7 +810,7 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
@@ -916,7 +854,7 @@ namespace llvm {
unsigned cxchgOpc,
unsigned notOpc,
unsigned EAXreg,
- TargetRegisterClass *RC,
+ const TargetRegisterClass *RC,
bool invSrc = false) const;
MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index dd4f6a5a85a4..54b91c3edb8b 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -1,4 +1,4 @@
-//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 74b647a4f6b1..0eee08339384 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1,10 +1,10 @@
-//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
-//
+//===-- X86InstrArithmetic.td - Integer Arithmetic Instrs --*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the integer arithmetic instructions in the X86
@@ -18,22 +18,24 @@
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+ "lea{w}\t{$src|$dst}, {$dst|$src}", [], IIC_LEA_16>, OpSize;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins i32mem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+ [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+ Requires<[In32BitMode]>;
def LEA64_32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins lea64_32mem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+ [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+ Requires<[In64BitMode]>;
let isReMaterializable = 1 in
def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
- [(set GR64:$dst, lea64addr:$src)]>;
+ [(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
@@ -51,21 +53,23 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>; // AL,AH = AL*GR8
+ (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*GR16
+ [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src", // EAX,EDX = EAX*GR32
- [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
+ IIC_MUL32_REG>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
"mul{q}\t$src", // RAX,RDX = RAX*GR64
- [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
+ IIC_MUL64>;
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
@@ -74,50 +78,51 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+ (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8]
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*[mem16]
+ [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
"mul{l}\t$src",
- []>; // EAX,EDX = EAX*[mem32]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+ [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+ "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64]
}
let neverHasSideEffects = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
- // AL,AH = AL*GR8
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
+ IIC_IMUL8>; // AL,AH = AL*GR8
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize; // AX,DX = AX*GR16
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
+ IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
- // EAX,EDX = EAX*GR32
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
- // RAX,RDX = RAX*GR64
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
+ IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
+ IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
+ // AX,DX = AX*[mem16]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
}
} // neverHasSideEffects
@@ -130,16 +135,19 @@ let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+ (X86smul_flag GR16:$src1, GR16:$src2))], IIC_IMUL16_RR>,
+ TB, OpSize;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+ (X86smul_flag GR32:$src1, GR32:$src2))], IIC_IMUL32_RR>,
+ TB;
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+ (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
+ TB;
}
// Register-Memory Signed Integer Multiply
@@ -147,18 +155,23 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))],
+ IIC_IMUL16_RM>,
TB, OpSize;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+ (X86smul_flag GR32:$src1, (load addr:$src2)))],
+ IIC_IMUL32_RM>,
+ TB;
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+ (X86smul_flag GR64:$src1, (load addr:$src2)))],
+ IIC_IMUL64_RM>,
+ TB;
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
@@ -170,33 +183,39 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+ (X86smul_flag GR16:$src1, imm:$src2))],
+ IIC_IMUL16_RRI>, OpSize;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))],
+ IIC_IMUL16_RRI>,
OpSize;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, imm:$src2))]>;
+ (X86smul_flag GR32:$src1, imm:$src2))],
+ IIC_IMUL32_RRI>;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))],
+ IIC_IMUL32_RRI>;
def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))],
+ IIC_IMUL64_RRI>;
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))],
+ IIC_IMUL64_RRI>;
// Memory-Integer Signed Integer Multiply
@@ -204,37 +223,43 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ (X86smul_flag (load addr:$src1), imm:$src2))],
+ IIC_IMUL16_RMI>,
OpSize;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (load addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
+ i16immSExt8:$src2))], IIC_IMUL16_RMI>,
+ OpSize;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>;
+ (X86smul_flag (load addr:$src1), imm:$src2))],
+ IIC_IMUL32_RMI>;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (load addr:$src1),
- i32immSExt8:$src2))]>;
+ i32immSExt8:$src2))],
+ IIC_IMUL32_RMI>;
def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (load addr:$src1),
- i64immSExt32:$src2))]>;
+ i64immSExt32:$src2))],
+ IIC_IMUL64_RMI>;
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (load addr:$src1),
- i64immSExt8:$src2))]>;
+ i64immSExt8:$src2))],
+ IIC_IMUL64_RMI>;
} // Defs = [EFLAGS]
@@ -243,62 +268,62 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "div{b}\t$src", []>;
+ "div{b}\t$src", [], IIC_DIV8_REG>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "div{w}\t$src", []>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "div{l}\t$src", []>;
+ "div{l}\t$src", [], IIC_DIV32>;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
- "div{q}\t$src", []>;
+ "div{q}\t$src", [], IIC_DIV64>;
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>;
+ "div{b}\t$src", [], IIC_DIV8_MEM>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>;
+ "div{l}\t$src", [], IIC_DIV32>;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>;
+ "div{q}\t$src", [], IIC_DIV64>;
}
// Signed division/remainder.
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "idiv{b}\t$src", []>;
+ "idiv{b}\t$src", [], IIC_IDIV8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "idiv{w}\t$src", []>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "idiv{l}\t$src", []>;
+ "idiv{l}\t$src", [], IIC_IDIV32>;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
- "idiv{q}\t$src", []>;
-
-let mayLoad = 1, mayLoad = 1 in {
+ "idiv{q}\t$src", [], IIC_IDIV64>;
+
+let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>;
+ "idiv{b}\t$src", [], IIC_IDIV8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", []>;
+ "idiv{l}\t$src", [], IIC_IDIV32>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>;
+ "idiv{q}\t$src", [], IIC_IDIV64>;
}
//===----------------------------------------------------------------------===//
@@ -312,35 +337,35 @@ let Constraints = "$src1 = $dst" in {
def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
"neg{b}\t$dst",
[(set GR8:$dst, (ineg GR8:$src1)),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_REG>;
def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"neg{w}\t$dst",
[(set GR16:$dst, (ineg GR16:$src1)),
- (implicit EFLAGS)]>, OpSize;
+ (implicit EFLAGS)], IIC_UNARY_REG>, OpSize;
def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"neg{l}\t$dst",
[(set GR32:$dst, (ineg GR32:$src1)),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_REG>;
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src1)),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_REG>;
} // Constraints = "$src1 = $dst"
def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
"neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
"neg{w}\t$dst",
[(store (ineg (loadi16 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
+ (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize;
def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
"neg{l}\t$dst",
[(store (ineg (loadi32 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
} // Defs = [EFLAGS]
@@ -351,29 +376,30 @@ let Constraints = "$src1 = $dst" in {
let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
"not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src1))]>;
+ [(set GR8:$dst, (not GR8:$src1))], IIC_UNARY_REG>;
def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+ [(set GR16:$dst, (not GR16:$src1))], IIC_UNARY_REG>, OpSize;
def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src1))]>;
+ [(set GR32:$dst, (not GR32:$src1))], IIC_UNARY_REG>;
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
- [(set GR64:$dst, (not GR64:$src1))]>;
+ [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
}
} // Constraints = "$src1 = $dst"
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
"not{b}\t$dst",
- [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+ [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
"not{w}\t$dst",
- [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+ [(store (not (loadi16 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>,
+ OpSize;
def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
"not{l}\t$dst",
- [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+ [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
- [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+ [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
@@ -382,19 +408,22 @@ let Constraints = "$src1 = $dst" in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))],
+ IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
OpSize, Requires<[In32BitMode]>;
def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
+ IIC_UNARY_REG>,
Requires<[In32BitMode]>;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))],
+ IIC_UNARY_REG>;
} // isConvertibleToThreeAddress = 1, CodeSize = 1
@@ -403,19 +432,23 @@ let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
// Can transform into LEA.
def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
+ IIC_UNARY_REG>,
OpSize, Requires<[In64BitMode]>;
def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
+ IIC_UNARY_REG>,
Requires<[In64BitMode]>;
def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
+ IIC_UNARY_REG>,
OpSize, Requires<[In64BitMode]>;
def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
+ IIC_UNARY_REG>,
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
@@ -424,37 +457,37 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
let CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize, Requires<[In32BitMode]>;
def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In32BitMode]>;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
// FIXME: What is this for??
def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize, Requires<[In64BitMode]>;
def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize, Requires<[In64BitMode]>;
def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
} // CodeSize = 2
@@ -462,18 +495,22 @@ let Constraints = "$src1 = $dst" in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
+ IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
+ IIC_UNARY_REG>,
OpSize, Requires<[In32BitMode]>;
def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
+ IIC_UNARY_REG>,
Requires<[In32BitMode]>;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
+ IIC_UNARY_REG>;
} // CodeSize = 2
} // Constraints = "$src1 = $dst"
@@ -481,18 +518,18 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
let CodeSize = 2 in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
OpSize, Requires<[In32BitMode]>;
def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
+ (implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In32BitMode]>;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>;
} // CodeSize = 2
} // Defs = [EFLAGS]
@@ -588,11 +625,13 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
/// or 1 (for i16,i32,i64 operations).
class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
- string mnemonic, string args, list<dag> pattern>
+ string mnemonic, string args, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
f, outs, ins,
- !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern,
+ itin> {
// Infer instruction prefixes from type info.
let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
@@ -601,10 +640,11 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
// BinOpRR - Instructions like "add reg, reg, reg".
class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, list<dag> pattern, Format f = MRMDestReg>
+ dag outlist, list<dag> pattern, InstrItinClass itin,
+ Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -612,7 +652,8 @@ class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM>;
// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
// just a EFLAGS as a result.
@@ -621,7 +662,7 @@ class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRR<opcode, mnemonic, typeinfo, (outs),
[(set EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
- f>;
+ IIC_BIN_NONMEM, f>;
// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
// both a regclass and EFLAGS as a result.
@@ -629,7 +670,8 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM>;
// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
// both a regclass and EFLAGS as a result, and has EFLAGS as input.
@@ -638,14 +680,14 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_NONMEM>;
// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
}
@@ -654,7 +696,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", []> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
}
@@ -664,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -700,7 +742,7 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -724,7 +766,6 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-
// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -738,7 +779,7 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -776,7 +817,7 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -804,7 +845,7 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern, bits<8> opcode = 0x80>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -815,7 +856,6 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src), addr:$dst),
(implicit EFLAGS)]>;
-
// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -837,7 +877,7 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -1150,7 +1190,7 @@ let Defs = [EFLAGS] in {
// register class is constrained to GR8_NOREX.
let isPseudo = 1 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
- "", []>;
+ "", [], IIC_BIN_NONMEM>;
}
//===----------------------------------------------------------------------===//
@@ -1160,14 +1200,39 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
PatFrag ld_frag> {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>;
+ [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))],
+ IIC_BIN_NONMEM>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>;
+ (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V;
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
}
+
+//===----------------------------------------------------------------------===//
+// MULX Instruction
+//
+multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+ let isCommutable = 1 in
+ def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+ [], IIC_MUL8>, T8XD, VEX_4V;
+
+ let mayLoad = 1 in
+ def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+ [], IIC_MUL8>, T8XD, VEX_4V;
+}
+}
+
+let Predicates = [HasBMI2] in {
+ let Uses = [EDX] in
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem>;
+ let Uses = [RDX] in
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 0245e5c09644..fa1d67644db7 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
namespace llvm {
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 3a43b22ddf3d..adeaf5410dcc 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -1,10 +1,10 @@
-//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
-//
+//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 conditional move and set on condition
@@ -21,17 +21,20 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst,
- (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+ (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))],
+ IIC_CMOV16_RR>,TB,OpSize;
def #NAME#32rr
: I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst,
- (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+ (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))],
+ IIC_CMOV32_RR>, TB;
def #NAME#64rr
:RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst,
- (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+ (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))],
+ IIC_CMOV32_RR>, TB;
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
@@ -39,17 +42,18 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- CondNode, EFLAGS))]>, TB, OpSize;
+ CondNode, EFLAGS))], IIC_CMOV16_RM>,
+ TB, OpSize;
def #NAME#32rm
: I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- CondNode, EFLAGS))]>, TB;
+ CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
def #NAME#64rm
:RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- CondNode, EFLAGS))]>, TB;
+ CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
} // end multiclass
@@ -78,10 +82,12 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
let Uses = [EFLAGS] in {
def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
- [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+ [(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
+ IIC_SET_R>, TB;
def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
- [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+ [(store (X86setcc OpNode, EFLAGS), addr:$dst)],
+ IIC_SET_M>, TB;
} // Uses = [EFLAGS]
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index da28690672a6..6f9e8492613a 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -112,23 +112,39 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
// allocated by bumping the stack pointer. Otherwise memory is allocated from
// the heap.
-let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
"# variable sized alloca for segmented stacks",
[(set GR32:$dst,
(X86SegAlloca GR32:$size))]>,
Requires<[In32BitMode]>;
-let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
"# variable sized alloca for segmented stacks",
[(set GR64:$dst,
(X86SegAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
-
}
+// The MSVC runtime contains an _ftol2 routine for converting floating-point
+// to integer values. It has a strange calling convention: the input is
+// popped from the x87 stack, and the return value is given in EDX:EAX. No
+// other registers (aside from flags) are touched.
+// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
+// variant is unnecessary.
+
+let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
+ def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
+ "# win32 fptoui",
+ [(X86WinFTOL RFP32:$src)]>,
+ Requires<[In32BitMode]>;
+ def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
+ "# win32 fptoui",
+ [(X86WinFTOL RFP64:$src)]>,
+ Requires<[In32BitMode]>;
+}
//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
@@ -137,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)]>;
+ [(X86ehret GR32:$addr)], IIC_RET>;
}
@@ -145,8 +161,26 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)]>;
+ [(X86ehret GR64:$addr)], IIC_RET>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions used by segmented stacks.
+//
+// This is lowered into a RET instruction by MCInstLower. We need
+// this so that we don't have to have a MachineBasicBlock which ends
+// with a RET and also has successors.
+let isPseudo = 1 in {
+def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
+ "", []>;
+
+// This instruction is lowered to a RET followed by a MOV. The two
+// instructions are not generated on a higher level since then the
+// verifier sees a MachineBasicBlock ending with a non-terminator.
+def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
+ "", []>;
}
//===----------------------------------------------------------------------===//
@@ -159,7 +193,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)]>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -168,11 +202,11 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)]>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)]>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -184,7 +218,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)]>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -192,7 +226,8 @@ def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
- "", [(set GR64:$dst, i64immZExt32:$src)]>;
+ "", [(set GR64:$dst, i64immZExt32:$src)],
+ IIC_ALU_NONMEM>;
// Use sbb to materialize carry bit.
let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
@@ -202,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
// X86CodeEmitter.
def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>,
OpSize;
def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
} // isCodeGenOnly
@@ -262,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
// String Pseudo Instructions
//
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP;
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+ Requires<[In32BitMode]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+ Requires<[In32BitMode]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+ Requires<[In32BitMode]>;
}
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP;
-
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+ Requires<[In64BitMode]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+}
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP;
+let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
+ let Uses = [AL,ECX,EDI] in
+ def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+ Requires<[In32BitMode]>;
+ let Uses = [AX,ECX,EDI] in
+ def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+ Requires<[In32BitMode]>;
+ let Uses = [EAX,ECX,EDI] in
+ def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+ Requires<[In32BitMode]>;
+}
+let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
+ let Uses = [AL,RCX,RDI] in
+ def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+ let Uses = [AX,RCX,RDI] in
+ def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+ Requires<[In64BitMode]>;
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+}
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
@@ -533,26 +605,17 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
// Memory barriers
// TODO: Get this to fold the constant into the instruction.
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
"lock\n\t"
"or{l}\t{$zero, $dst|$dst, $zero}",
- []>, Requires<[In32BitMode]>, LOCK;
+ [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
[(X86MemBarrier)]>;
-// TODO: Get this to fold the constant into the instruction.
-let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
-def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
- "lock\n\t"
- "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
- [(X86MemBarrierNoSSE GR64:$zero)]>,
- Requires<[In64BitMode]>, LOCK;
-
-
// RegOpc corresponds to the mr version of the instruction
// ImmOpc corresponds to the mi version of the instruction
// ImmOpc8 corresponds to the mi8 version of the instruction
@@ -566,72 +629,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
!strconcat("lock\n\t", mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, OpSize, LOCK;
+ [], IIC_ALU_NONMEM>, OpSize, LOCK;
def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
}
@@ -648,29 +711,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
"lock\n\t"
- "inc{b}\t$dst", []>, LOCK;
+ "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
"lock\n\t"
- "inc{w}\t$dst", []>, OpSize, LOCK;
+ "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
"lock\n\t"
- "inc{l}\t$dst", []>, LOCK;
+ "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
"lock\n\t"
- "inc{q}\t$dst", []>, LOCK;
+ "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
"lock\n\t"
- "dec{b}\t$dst", []>, LOCK;
+ "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
"lock\n\t"
- "dec{w}\t$dst", []>, OpSize, LOCK;
+ "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
"lock\n\t"
- "dec{l}\t$dst", []>, LOCK;
+ "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
"lock\n\t"
- "dec{q}\t$dst", []>, LOCK;
+ "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
}
// Atomic compare and swap.
@@ -679,42 +742,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"lock\n\t"
"cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)]>, TB, LOCK;
+ [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
isCodeGenOnly = 1 in
def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
"lock\n\t"
"cmpxchg16b\t$ptr",
- [(X86cas16 addr:$ptr)]>, TB, LOCK,
+ [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
Requires<[HasCmpxchg16b]>;
let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
"lock\n\t"
"cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
}
let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
"lock\n\t"
"cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+ [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
}
let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
"lock\n\t"
"cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
}
let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
"cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
}
// Atomic exchange and add
@@ -722,22 +785,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
"lock\n\t"
"xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
+ IIC_XADD_LOCK_MEM8>,
TB, LOCK;
def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
"lock\n\t"
"xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, OpSize, LOCK;
def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
"lock\n\t"
"xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, LOCK;
def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
"lock\n\t"
"xadd{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, LOCK;
}
@@ -936,14 +1003,9 @@ def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
// Direct PC relative function call for small code model. 32-bit displacement
// sign extended to 64-bit.
def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+ (CALL64pcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call (i64 texternalsym:$dst)),
- (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
-
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
- (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+ (CALL64pcrel32 texternalsym:$dst)>;
// tailcall stuff
def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
@@ -1105,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
@@ -1440,58 +1500,62 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+// Helper imms that check if a mask doesn't change significant shift bits.
+def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
+def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
+
// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
+def : Pat<(shl GR8:$src1, (and CL, immShift32)),
(SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
+def : Pat<(shl GR16:$src1, (and CL, immShift32)),
(SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
+def : Pat<(shl GR32:$src1, (and CL, immShift32)),
(SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL32mCL addr:$dst)>;
-def : Pat<(srl GR8:$src1, (and CL, 31)),
+def : Pat<(srl GR8:$src1, (and CL, immShift32)),
(SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
+def : Pat<(srl GR16:$src1, (and CL, immShift32)),
(SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
+def : Pat<(srl GR32:$src1, (and CL, immShift32)),
(SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR32mCL addr:$dst)>;
-def : Pat<(sra GR8:$src1, (and CL, 31)),
+def : Pat<(sra GR8:$src1, (and CL, immShift32)),
(SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
+def : Pat<(sra GR16:$src1, (and CL, immShift32)),
(SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
+def : Pat<(sra GR32:$src1, (and CL, immShift32)),
(SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL, 63)),
+def : Pat<(shl GR64:$src1, (and CL, immShift64)),
(SHL64rCL GR64:$src1)>;
def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHL64mCL addr:$dst)>;
-def : Pat<(srl GR64:$src1, (and CL, 63)),
+def : Pat<(srl GR64:$src1, (and CL, immShift64)),
(SHR64rCL GR64:$src1)>;
def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHR64mCL addr:$dst)>;
-def : Pat<(sra GR64:$src1, (and CL, 63)),
+def : Pat<(sra GR64:$src1, (and CL, immShift64)),
(SAR64rCL GR64:$src1)>;
def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
@@ -1735,3 +1799,11 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2),
(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Bit scan instruction patterns to match explicit zero-undef behavior.
+def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
+def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
+def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
+def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index c228a0aed59c..bf11fdec5add 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -1,4 +1,4 @@
-//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,39 +20,47 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
- [(X86retflag 0)]>;
+ [(X86retflag 0)], IIC_RET>;
+ def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret{w}",
+ [], IIC_RET>, OpSize;
def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
- [(X86retflag timm:$amt)]>;
+ [(X86retflag timm:$amt)], IIC_RET_IMM>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
- "retw\t$amt",
- []>, OpSize;
+ "ret{w}\t$amt",
+ [], IIC_RET_IMM>, OpSize;
def LRETL : I <0xCB, RawFrm, (outs), (ins),
- "lretl", []>;
+ "{l}ret{l|f}", [], IIC_RET>;
+ def LRETW : I <0xCB, RawFrm, (outs), (ins),
+ "{l}ret{w|f}", [], IIC_RET>, OpSize;
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
- "lretq", []>;
+ "{l}ret{q|f}", [], IIC_RET>;
def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lret\t$amt", []>;
+ "{l}ret{l|f}\t$amt", [], IIC_RET>;
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lretw\t$amt", []>, OpSize;
+ "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize;
}
// Unconditional branches.
let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp\t$dst", [(br bb:$dst)]>;
+ "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst", []>;
+ "jmp\t$dst", [], IIC_JMP_REL>;
+ // FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious
+ // with JMP_1.
def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp{q}\t$dst", []>;
+ "jmpq\t$dst", [], IIC_JMP_REL>;
}
// Conditional Branches.
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
- def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
+ IIC_Jcc>;
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+ [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, TB;
}
}
@@ -74,61 +82,61 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
-let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1 in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
let Uses = [CX] in
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+ "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In32BitMode]>;
let Uses = [ECX] in
def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+ "jecxz\t$dst", [], IIC_JCXZ>, Requires<[In32BitMode]>;
// J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
// In 64-bit mode, the address size prefix is jecxz and the unprefixed version
// is jrcxz.
let Uses = [ECX] in
def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+ "jecxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In64BitMode]>;
let Uses = [RCX] in
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+ "jrcxz\t$dst", [], IIC_JCXZ>, Requires<[In64BitMode]>;
}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+ [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+ [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+ [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+ [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>;
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
- "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize;
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+ "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
- "ljmp{q}\t{*}$dst", []>;
+ "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
- "ljmp{w}\t{*}$dst", []>, OpSize;
+ "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
- "ljmp{l}\t{*}$dst", []>;
+ "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
}
// Loop instructions
-def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -138,32 +146,30 @@ let isCall = 1 in
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
// registers are added manually.
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
+ let Uses = [ESP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i32imm_pcrel:$dst,variable_ops),
- "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+ "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
- "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+ "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
Requires<[In32BitMode]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
- "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
Requires<[In32BitMode]>;
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
- "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ "lcall{w}\t{$seg, $off|$off, $seg}", [],
+ IIC_CALL_FAR_PTR>, OpSize;
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
- "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+ "lcall{l}\t{$seg, $off|$off, $seg}", [],
+ IIC_CALL_FAR_PTR>;
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
- "lcall{w}\t{*}$dst", []>, OpSize;
+ "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
- "lcall{l}\t{*}$dst", []>;
+ "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
// callw for 16 bit code for the assembler.
let isAsmParserOnly = 1 in
@@ -177,11 +183,7 @@ let isCall = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
+ let Uses = [ESP] in {
def TCRETURNdi : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
def TCRETURNri : PseudoI<(outs),
@@ -194,74 +196,43 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
// mcinst.
def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL",
- []>;
+ "jmp\t$dst # TAILCALL",
+ [], IIC_JMP_REL>;
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
- "", []>; // FIXME: Remove encoding when JIT is dead.
+ "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
- "jmp{l}\t{*}$dst # TAILCALL", []>;
+ "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
}
//===----------------------------------------------------------------------===//
// Call Instructions...
//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. RSP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
-
- // NOTE: this pattern doesn't match "X86call imm", because we do not know
- // that the offset between an arbitrary immediate and the call will fit in
- // the 32-bit pcrel field that we have.
- def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call{q}\t$dst", []>,
- Requires<[In64BitMode, NotWin64]>;
- def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode, NotWin64]>;
- def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
- "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
- Requires<[In64BitMode, NotWin64]>;
- def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
- "lcall{q}\t{*}$dst", []>;
- }
+// RSP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1, Uses = [RSP] in {
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", [], IIC_CALL_RI>,
+ Requires<[In64BitMode]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call GR64:$dst)],
+ IIC_CALL_RI>,
+ Requires<[In64BitMode]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
+ IIC_CALL_MEM>,
+ Requires<[In64BitMode]>;
- // FIXME: We need to teach codegen about single list of call-clobbered
- // registers.
-let isCall = 1, isCodeGenOnly = 1 in
- // All calls clobber the non-callee saved registers. RSP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
- Uses = [RSP] in {
- def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call{q}\t$dst", []>,
- Requires<[IsWin64]>;
- def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call{q}\t{*}$dst",
- [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
- def WINCALL64m : I<0xFF, MRM2m, (outs),
- (ins i64mem:$dst,variable_ops),
- "call{q}\t{*}$dst",
- [(X86call (loadi64 addr:$dst))]>,
- Requires<[IsWin64]>;
- }
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+}
let isCall = 1, isCodeGenOnly = 1 in
// __chkstk(MSVC): clobber R10, R11 and EFLAGS.
@@ -270,18 +241,13 @@ let isCall = 1, isCodeGenOnly = 1 in
Uses = [RSP] in {
def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call{q}\t$dst", []>,
+ "call{q}\t$dst", [], IIC_CALL_RI>,
Requires<[IsWin64]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
- // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
- let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
- Uses = [RSP],
+ let Uses = [RSP],
usesCustomInserter = 1 in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
@@ -294,11 +260,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL", []>;
+ "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>;
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
- "jmp{q}\t{*}$dst # TAILCALL", []>;
+ "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
- "jmp{q}\t{*}$dst # TAILCALL", []>;
+ "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
}
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index e62e6b701f46..0d5490ad9cc1 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -1,10 +1,10 @@
-//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
-//
+//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the sign and zero extension operations.
@@ -37,40 +37,47 @@ let neverHasSideEffects = 1 in {
}
+
// Sign/Zero extenders
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
+ TB, OpSize;
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
+ TB, OpSize;
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))]>, TB;
+ [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))]>, TB;
+ [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
+ TB;
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
+ TB, OpSize;
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
+ TB, OpSize;
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))]>, TB;
+ [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))]>, TB;
+ [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
+ TB;
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
@@ -78,12 +85,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- []>, TB;
+ [], IIC_MOVZX>, TB;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- []>, TB;
+ [], IIC_MOVZX>, TB;
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register
@@ -91,32 +98,38 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))]>, TB;
+ [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
+ TB;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))]>, TB;
+ [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
+ TB;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))]>;
+ [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
// movzbq and movzwq encodings for the disassembler
def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB;
def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB;
def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB;
def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB;
// FIXME: These should be Pat patterns.
let isCodeGenOnly = 1 in {
@@ -124,15 +137,17 @@ let isCodeGenOnly = 1 in {
// Use movzbl instead of movzbq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+ "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
- "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+ "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
+ TB;
// Use movzwl instead of movzwq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+ "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+ "", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
+ IIC_MOVZX>, TB;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -142,10 +157,9 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
// necessarily all zero. In such cases, we fall back to these explicit zext
// instructions.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))]>;
+ "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
- "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-
+ "", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
+ IIC_MOVZX>;
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d868773d2d69..d57937b2e1b7 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -1,4 +1,4 @@
-//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,7 +15,7 @@
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
-multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -34,27 +34,187 @@ multiclass fma_rm<bits<8> opc, string OpcodeStr> {
[]>;
}
-multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpcodeStr, string PackTy> {
- defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
- defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
- defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy> {
+ defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+ defm r231 : fma3p_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
}
-let isAsmParserOnly = 1 in {
- // Fused Multiply-Add
- defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
- defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
- defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
- defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
- defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
- defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
- defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
- defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+// Fused Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+ defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+ defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+}
+
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+ defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+ defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+ defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+}
+
+// Fused Negative Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+ defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+ defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+ defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
+
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
- // Fused Negative Multiply-Add
- defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
- defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
- defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
- defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr> {
+ defm SSr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132ss"), f32mem>;
+ defm SSr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213ss"), f32mem>;
+ defm SSr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231ss"), f32mem>;
+ defm SDr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132sd"), f64mem>, VEX_W;
+ defm SDr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213sd"), f64mem>, VEX_W;
+ defm SDr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231sd"), f64mem>, VEX_W;
}
+
+defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd">, VEX_LIG;
+defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub">, VEX_LIG;
+
+defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd">, VEX_LIG;
+defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub">, VEX_LIG;
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic Int> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, memop:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+// For disassembler
+let isCodeGenOnly = 1 in
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr,
+ Intrinsic Int128, Intrinsic Int256,
+ PatFrag ld_frag128, PatFrag ld_frag256> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2,
+ (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4;
+ def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2,
+ (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4;
+ def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+ def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+} // isCodeGenOnly = 1
+}
+
+defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma4_vfmadd_ss>;
+defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma4_vfmadd_sd>;
+defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma4_vfmadd_ps,
+ int_x86_fma4_vfmadd_ps_256, memopv4f32, memopv8f32>;
+defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma4_vfmadd_pd,
+ int_x86_fma4_vfmadd_pd_256, memopv2f64, memopv4f64>;
+defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma4_vfmsub_ss>;
+defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma4_vfmsub_sd>;
+defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma4_vfmsub_ps,
+ int_x86_fma4_vfmsub_ps_256, memopv4f32, memopv8f32>;
+defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma4_vfmsub_pd,
+ int_x86_fma4_vfmsub_pd_256, memopv2f64, memopv4f64>;
+defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma4_vfnmadd_ss>;
+defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma4_vfnmadd_sd>;
+defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma4_vfnmadd_ps,
+ int_x86_fma4_vfnmadd_ps_256, memopv4f32, memopv8f32>;
+defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma4_vfnmadd_pd,
+ int_x86_fma4_vfnmadd_pd_256, memopv2f64, memopv4f64>;
+defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma4_vfnmsub_ss>;
+defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma4_vfnmsub_sd>;
+defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma4_vfnmsub_ps,
+ int_x86_fma4_vfnmsub_ps_256, memopv4f32, memopv8f32>;
+defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma4_vfnmsub_pd,
+ int_x86_fma4_vfnmsub_pd_256, memopv2f64, memopv4f64>;
+defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma4_vfmaddsub_ps,
+ int_x86_fma4_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
+defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma4_vfmaddsub_pd,
+ int_x86_fma4_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
+defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma4_vfmsubadd_ps,
+ int_x86_fma4_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
+defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma4_vfmsubadd_pd,
+ int_x86_fma4_vfmsubadd_pd_256, memopv2f64, memopv4f64>;
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 7cb870fabd62..a13887e932b2 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -1,10 +1,10 @@
-//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
-//
+//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 x87 FPU instruction set, defining the
@@ -225,22 +225,22 @@ class FPrST0PInst<bits<8> o, string asm>
// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
// we have to put some 'r's in and take them out of weird places.
def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">;
-def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, ST(0)}">;
def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">;
def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">;
-def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, ST(0)}">;
def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">;
-def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, ST(0)}">;
def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">;
-def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, ST(0)}">;
def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">;
def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">;
-def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, ST(0)}">;
def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">;
-def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, ST(0)}">;
def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">;
@@ -330,21 +330,21 @@ defm CMOVNP : FPCMov<X86_COND_NP>;
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
- "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+ "fcmovb\t{$op, %st(0)|ST(0), $op}">, DA;
def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
- "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+ "fcmovbe\t{$op, %st(0)|ST(0), $op}">, DA;
def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
- "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+ "fcmove\t{$op, %st(0)|ST(0), $op}">, DA;
def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
- "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+ "fcmovu\t {$op, %st(0)|ST(0), $op}">, DA;
def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
- "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+ "fcmovnb\t{$op, %st(0)|ST(0), $op}">, DB;
def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
- "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+ "fcmovnbe\t{$op, %st(0)|ST(0), $op}">, DB;
def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
- "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+ "fcmovne\t{$op, %st(0)|ST(0), $op}">, DB;
def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
- "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+ "fcmovnu\t{$op, %st(0)|ST(0), $op}">, DB;
} // Predicates = [HasCMov]
// Floating point loads & stores.
@@ -437,33 +437,26 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
}
// FISTTP requires SSE3 even though it's a FPStack op.
+let Predicates = [HasSSE3] in {
def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
- [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
- [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
- [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
- [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
- [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
- [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
- [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
- [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
- [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
- Requires<[HasSSE3]>;
+ [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
+} // Predicates = [HasSSE3]
let mayStore = 1 in {
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 0a1590b3e0a2..b3870906ab0a 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -1,10 +1,10 @@
-//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
-//
+//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -43,6 +43,15 @@ def RawFrmImm8 : Format<43>;
def RawFrmImm16 : Format<44>;
def MRM_D0 : Format<45>;
def MRM_D1 : Format<46>;
+def MRM_D4 : Format<47>;
+def MRM_D8 : Format<48>;
+def MRM_D9 : Format<49>;
+def MRM_DA : Format<50>;
+def MRM_DB : Format<51>;
+def MRM_DC : Format<52>;
+def MRM_DD : Format<53>;
+def MRM_DE : Format<54>;
+def MRM_DF : Format<55>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -107,17 +116,25 @@ class T8 { bits<5> Prefix = 13; }
class TA { bits<5> Prefix = 14; }
class A6 { bits<5> Prefix = 15; }
class A7 { bits<5> Prefix = 16; }
-class TF { bits<5> Prefix = 17; }
+class T8XD { bits<5> Prefix = 17; }
+class T8XS { bits<5> Prefix = 18; }
+class TAXD { bits<5> Prefix = 19; }
+class XOP8 { bits<5> Prefix = 20; }
+class XOP9 { bits<5> Prefix = 21; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_4VOp3 : VEX { bit hasVEX_4VOp3Prefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
-
+class MemOp4 { bit hasMemOp4Prefix = 1; }
+class XOP { bit hasXOP_Prefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
- string AsmStr, Domain d = GenericDomain>
+ string AsmStr,
+ InstrItinClass itin,
+ Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
@@ -133,6 +150,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+ let Itinerary = itin;
+
//
// Attributes specific to X86 instructions...
//
@@ -148,11 +167,15 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_4VOp3Prefix = 0; // Does this inst require the VEX.VVVV field to
+ // encode the third operand?
bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
// to be encoded in a immediate field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+ bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands
+ bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -169,58 +192,63 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{33} = hasVEXPrefix;
let TSFlags{34} = hasVEX_WPrefix;
let TSFlags{35} = hasVEX_4VPrefix;
- let TSFlags{36} = hasVEX_i8ImmReg;
- let TSFlags{37} = hasVEX_L;
- let TSFlags{38} = ignoresVEX_L;
- let TSFlags{39} = has3DNow0F0FOpcode;
+ let TSFlags{36} = hasVEX_4VOp3Prefix;
+ let TSFlags{37} = hasVEX_i8ImmReg;
+ let TSFlags{38} = hasVEX_L;
+ let TSFlags{39} = ignoresVEX_L;
+ let TSFlags{40} = has3DNow0F0FOpcode;
+ let TSFlags{41} = hasMemOp4Prefix;
+ let TSFlags{42} = hasXOP_Prefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
- : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> {
let Pattern = pattern;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+ Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+ Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm16, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm32, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -231,8 +259,9 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
: I<o, F, outs, ins, asm, []> {}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
-class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
- : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
+ InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
let FPForm = fp;
let Pattern = pattern;
}
@@ -244,20 +273,23 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
// SI - SSE 1 & 2 scalar instructions
-class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern> {
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
@@ -267,8 +299,8 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
// SIi8 - SSE 1 & 2 scalar instructions
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern> {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
@@ -278,8 +310,8 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// PI - SSE 1 & 2 packed instructions
class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
- Domain d>
- : I<o, F, outs, ins, asm, pattern, d> {
+ InstrItinClass itin, Domain d>
+ : I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
@@ -289,8 +321,8 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
// PIi8 - SSE 1 & 2 packed instructions with immediate
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d>
- : Ii8<o, F, outs, ins, asm, pattern, d> {
+ list<dag> pattern, InstrItinClass itin, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
!if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
@@ -306,25 +338,27 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// VSSI - SSE1 instructions with XS prefix in AVX form.
// VPSI - SSE1 instructions with TB prefix in AVX form.
-class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
Requires<[HasSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
Requires<[HasSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
@@ -337,28 +371,30 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
// VSDI - SSE2 instructions with XD prefix in AVX form.
// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
-class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
-class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[HasSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[HasSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
// SSE3 Instruction Templates:
@@ -368,15 +404,16 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
// S3DI - SSE3 instructions with XD prefix.
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
Requires<[HasSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
Requires<[HasSSE3]>;
-class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[HasSSE3]>;
@@ -386,16 +423,16 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
// SS3AI - SSSE3 instructions with TA prefix.
//
// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
-// uses the MMX registers. We put those instructions here because they better
-// fit into the SSSE3 instruction category rather than the MMX category.
+// uses the MMX registers. The 64-bit versions are grouped with the MMX
+// classes. They need to be enabled even if AVX is enabled.
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
@@ -404,31 +441,31 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasSSE42]>;
-// SS42FI - SSE 4.2 instructions with TF prefix.
+// SS42FI - SSE 4.2 instructions with T8XD prefix.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
-
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
+
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSE42]>;
// AVX Instruction Templates:
@@ -437,76 +474,115 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// AVX8I - AVX instructions with T8 and OpSize prefix.
// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
Requires<[HasAVX]>;
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
Requires<[HasAVX]>;
+// AVX2 Instruction Templates:
+// Instructions introduced in AVX2 (no SSE equivalent forms)
+//
+// AVX28I - AVX2 instructions with T8 and OpSize prefix.
+// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX2]>;
+class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX2]>;
+
// AES Instruction Templates:
//
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
- Requires<[HasAES]>;
+ list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[HasSSE2, HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
- Requires<[HasAES]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[HasSSE2, HasAES]>;
// CLMUL Instruction Templates
class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
- OpSize, Requires<[HasCLMUL]>;
+ list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ OpSize, Requires<[HasSSE2, HasCLMUL]>;
class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, T8,
OpSize, VEX_4V, Requires<[HasFMA3]>;
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+ XOP, XOP9, Requires<[HasXOP]>;
+
+// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+ XOP, XOP8, Requires<[HasXOP]>;
+
+// XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+
// X86-64 Instruction templates...
//
-class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
let Pattern = pattern;
let CodeSize = 3;
}
class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : VPDI<o, F, outs, ins, asm, pattern>, VEX_W;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
// MMX Instruction templates
//
@@ -519,23 +595,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index af919fba8ee4..041a64f336f8 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1,10 +1,10 @@
-//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides pattern fragments useful for SIMD instructions.
@@ -41,24 +41,20 @@ def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
+def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
+def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
- SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignb : SDNode<"X86ISD::PSIGNB",
- SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
-def X86psignw : SDNode<"X86ISD::PSIGNW",
- SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
-def X86psignd : SDNode<"X86ISD::PSIGND",
- SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+def X86psign : SDNode<"X86ISD::PSIGN",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
@@ -75,20 +71,30 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
-def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
-def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
-def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
-def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
-def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
-def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
-def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
+def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
+def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
+def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+
+def X86vshl : SDNode<"X86ISD::VSHL",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>>;
+def X86vsrl : SDNode<"X86ISD::VSRL",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>>;
+def X86vsra : SDNode<"X86ISD::VSRA",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>>;
+
+def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
+def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
+def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
@@ -96,6 +102,17 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+def X86vpcom : SDNode<"X86ISD::VPCOM",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>;
+def X86vpcomu : SDNode<"X86ISD::VPCOMU",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>;
+
+def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameAs<1,2>]>>;
+
// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
// translated into one of the target nodes below during lowering.
// Note: this is a work in progress...
@@ -109,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
@@ -116,8 +135,7 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
-def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
-def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
@@ -129,40 +147,23 @@ def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
-def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
-def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
-def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
-
-def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
-def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
-def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
-
-def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
-def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
-def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
-def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
-
-def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
-def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
-def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
-def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
-def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
-def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
-def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
+def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
-def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
+def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
+def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -195,15 +196,15 @@ def sdmem : Operand<v2f64> {
//===----------------------------------------------------------------------===//
// 128-bit load pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
-def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
// 256-bit load pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
-def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
// Like 'store', but always requires 128-bit vector alignment.
@@ -223,6 +224,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+ return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
// Like 'load', but always requires 256-bit vector alignment.
def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 32;
@@ -234,22 +240,20 @@ def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
// 128-bit aligned load pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
(v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
- (v4i32 (alignedload node:$ptr))>;
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
// 256-bit aligned load pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload256 node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
(v4f64 (alignedload256 node:$ptr))>;
-def alignedloadv8i32 : PatFrag<(ops node:$ptr),
- (v8i32 (alignedload256 node:$ptr))>;
def alignedloadv4i64 : PatFrag<(ops node:$ptr),
(v4i64 (alignedload256 node:$ptr))>;
@@ -268,19 +272,16 @@ def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
// 128-bit memop pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
-def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
-def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
// 256-bit memop pattern fragments
-def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
-def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
@@ -326,6 +327,8 @@ def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
// 256-bit bitconvert pattern fragments
+def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
+def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
@@ -350,30 +353,6 @@ def BYTE_imm : SDNodeXForm<imm, [{
return getI32Imm(N->getZExtValue() >> 3);
}]>;
-// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
-// SHUFP* etc. imm.
-def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
-// PSHUFHW imm.
-def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
-// PSHUFLW imm.
-def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
-}]>;
-
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
// to VEXTRACTF128 imm.
def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
@@ -386,72 +365,6 @@ def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
}]>;
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
-def movddup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def shufp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshufhw_imm>;
-
-def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshuflw_imm>;
-
def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
node:$index), [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 3a02de0aa01b..307c96b8c43f 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,14 +25,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
#include <limits>
#define GET_INSTRINFO_CTOR
@@ -83,6 +82,12 @@ enum {
TB_FOLDED_STORE = 1 << 19
};
+struct X86OpTblEntry {
+ uint16_t RegOp;
+ uint16_t MemOp;
+ uint32_t Flags;
+};
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -92,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- static const unsigned OpTbl2Addr[][3] = {
+ static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
{ X86::ADC32ri8, X86::ADC32mi8, 0 },
{ X86::ADC32rr, X86::ADC32mr, 0 },
@@ -260,22 +265,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
- unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- unsigned Flags = OpTbl2Addr[i][2];
+ unsigned RegOp = OpTbl2Addr[i].RegOp;
+ unsigned MemOp = OpTbl2Addr[i].MemOp;
+ unsigned Flags = OpTbl2Addr[i].Flags;
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
RegOp, MemOp,
// Index 0, folded load and store, no alignment requirement.
Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
- static const unsigned OpTbl0[][3] = {
+ static const X86OpTblEntry OpTbl0[] = {
{ X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
{ X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
{ X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
{ X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
{ X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
- { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD },
{ X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
{ X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
{ X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
@@ -352,6 +356,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -362,6 +367,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
{ X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
// AVX 256-bit foldable instructions
+ { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Flags = OpTbl0[i][2];
+ unsigned RegOp = OpTbl0[i].RegOp;
+ unsigned MemOp = OpTbl0[i].MemOp;
+ unsigned Flags = OpTbl0[i].Flags;
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
RegOp, MemOp, TB_INDEX_0 | Flags);
}
- static const unsigned OpTbl1[][3] = {
+ static const X86OpTblEntry OpTbl1[] = {
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
@@ -456,6 +462,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
{ X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
{ X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
+ { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
+ { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
{ X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
{ X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
{ X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
@@ -508,6 +517,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 },
+ { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 },
+ { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 },
{ X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
{ X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
{ X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
@@ -524,22 +538,39 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// AVX 256-bit foldable instructions
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
+ // AVX2 foldable instructions
+ { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
+ { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
+ { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 },
+ { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 },
+ { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
+ { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
- unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
- unsigned Flags = OpTbl1[i][2];
+ unsigned RegOp = OpTbl1[i].RegOp;
+ unsigned MemOp = OpTbl1[i].MemOp;
+ unsigned Flags = OpTbl1[i].Flags;
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
RegOp, MemOp,
// Index 1, folded load
Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
- static const unsigned OpTbl2[][3] = {
+ static const X86OpTblEntry OpTbl2[] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
@@ -563,6 +594,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
{ X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
{ X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
+ { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
+ { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
+ { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
{ X86::CMOVA16rr, X86::CMOVA16rm, 0 },
{ X86::CMOVA32rr, X86::CMOVA32rm, 0 },
{ X86::CMOVA64rr, X86::CMOVA64rm, 0 },
@@ -652,6 +687,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
{ X86::MINSSrr, X86::MINSSrm, 0 },
{ X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
{ X86::MULSDrr, X86::MULSDrm, 0 },
@@ -664,30 +700,45 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
{ X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
{ X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
{ X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
{ X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
{ X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
{ X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
{ X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
{ X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
+ { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
{ X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 },
{ X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
{ X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
{ X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
{ X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
{ X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
{ X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
{ X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
{ X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
{ X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
{ X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
+ { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
+ { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
+ { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
{ X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
+ { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
{ X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
{ X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
{ X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
{ X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
{ X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
@@ -695,6 +746,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
{ X86::PORrr, X86::PORrm, TB_ALIGN_16 },
{ X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
+ { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
+ { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
+ { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
{ X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
{ X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
{ X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
@@ -778,6 +833,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
{ X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
{ X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 },
{ X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
{ X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
@@ -816,6 +875,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
{ X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
+ { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 },
{ X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
{ X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
{ X86::VMULSDrr, X86::VMULSDrm, 0 },
@@ -824,28 +884,47 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
{ X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
{ X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
+ { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 },
{ X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
{ X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
{ X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
{ X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
{ X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
{ X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
+ { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 },
+ { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 },
{ X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
+ { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 },
{ X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
{ X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
+ { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 },
+ { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 },
{ X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
{ X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 },
{ X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
{ X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
{ X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
+ { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
{ X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
+ { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
+ { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
{ X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
+ { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 },
{ X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
{ X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
{ X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
{ X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
{ X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
{ X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
+ { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 },
{ X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
{ X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
{ X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
@@ -853,6 +932,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
{ X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
{ X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 },
+ { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 },
+ { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 },
+ { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 },
{ X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
{ X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
{ X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
@@ -886,14 +969,154 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
{ X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
- { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }
+ { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 },
+ // AVX 256-bit foldable instructions
+ { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 },
+ { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 },
+ { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 },
+ { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 },
+ { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 },
+ { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 },
+ { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 },
+ // AVX2 foldable instructions
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 },
+ { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 },
+ { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 },
+ { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
+ { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
+ { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
+ { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 },
+ { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 },
+ { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 },
+ { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 },
+ { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 },
+ { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 },
+ { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 },
+ { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 },
+ { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 },
+ { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 },
+ { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 },
+ { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 },
+ { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 },
+ { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 },
+ { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 },
+ { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 },
+ { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 },
+ { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
+ { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
- unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
- unsigned Flags = OpTbl2[i][2];
+ unsigned RegOp = OpTbl2[i].RegOp;
+ unsigned MemOp = OpTbl2[i].MemOp;
+ unsigned Flags = OpTbl2[i].Flags;
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
RegOp, MemOp,
// Index 2, folded load
@@ -946,7 +1169,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable(0);
- break;
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -989,7 +1211,8 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
static bool isFrameLoadOpcode(int Opcode) {
switch (Opcode) {
- default: break;
+ default:
+ return false;
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
@@ -1011,9 +1234,7 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
return true;
- break;
}
- return false;
}
static bool isFrameStoreOpcode(int Opcode) {
@@ -1203,6 +1424,8 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
bool SeenDef = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SeenDef = true;
if (!MO.isReg())
continue;
if (MO.getReg() == X86::EFLAGS) {
@@ -1247,6 +1470,10 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
+ // A register mask may clobber EFLAGS, but we should still look for a
+ // live EFLAGS def.
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SawKill = true;
if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
if (MO.isDef()) return MO.isDead();
if (MO.isKill()) SawKill = true;
@@ -1357,7 +1584,6 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
switch (MIOpc) {
default:
llvm_unreachable(0);
- break;
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
@@ -1392,9 +1618,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
.addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
@@ -1469,6 +1695,24 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(B, getKillRegState(isKill)).addImm(M);
break;
}
+ case X86::SHUFPDrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+
+ // Convert to PSHUFD mask.
+ M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
@@ -1597,7 +1841,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc;
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
Opc = X86::LEA64r;
RC = X86::GR64_NOSPRegisterClass;
@@ -1904,13 +2148,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
}
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -1936,7 +2179,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled by this
// analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Handle unconditional branches.
@@ -2420,7 +2663,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (MI->getOpcode()) {
case X86::V_SET0:
- return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+ case X86::FsFLD0SS:
+ case X86::FsFLD0SD:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@@ -2624,6 +2869,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
///
static bool hasPartialRegUpdate(unsigned Opcode) {
switch (Opcode) {
+ case X86::CVTSI2SSrr:
+ case X86::CVTSI2SS64rr:
+ case X86::CVTSI2SDrr:
+ case X86::CVTSI2SD64rr:
case X86::CVTSD2SSrr:
case X86::Int_CVTSD2SSrr:
case X86::CVTSS2SDrr:
@@ -2631,7 +2880,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::RCPSSr:
case X86::RCPSSr_Int:
case X86::ROUNDSDr:
+ case X86::ROUNDSDr_Int:
case X86::ROUNDSSr:
+ case X86::ROUNDSSr_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2643,7 +2894,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::Int_VCVTSS2SDrr:
case X86::VRCPSSr:
case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
case X86::VRSQRTSSr:
case X86::VSQRTSSr:
return true;
@@ -2652,6 +2905,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
return false;
}
+/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// instructions we would like before a partial register update.
+unsigned X86InstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // If MI is marked as reading Reg, the partial register update is wanted.
+ const MachineOperand &MO = MI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (MO.readsReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else {
+ if (MI->readsRegister(Reg, TRI))
+ return 0;
+ }
+
+ // If any of the preceding 16 instructions are reading Reg, insert a
+ // dependency breaking instruction. The magic number is based on a few
+ // Nehalem experiments.
+ return 16;
+}
+
+void X86InstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (X86::VR128RegClass.contains(Reg)) {
+ // These instructions are all floating point domain, so xorps is the best
+ // choice.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
+ .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ } else if (X86::VR256RegClass.contains(Reg)) {
+ // Use vxorps to clear the full ymm register.
+ // It wants to read and write the xmm sub-register.
+ unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
+ .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
+ .addReg(Reg, RegState::ImplicitDefine);
+ } else
+ return;
+ MI->addRegisterKilled(Reg, TRI, true);
+}
+
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
@@ -2714,6 +3015,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (LoadMI->getOpcode()) {
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
Alignment = 32;
break;
case X86::V_SET0:
@@ -2722,11 +3025,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
- case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
- case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2759,10 +3060,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX_SETALLONES:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
case X86::FsFLD0SD:
- case X86::FsFLD0SS:
- case X86::VFsFLD0SD:
- case X86::VFsFLD0SS: {
+ case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -2788,16 +3089,19 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
+ Opc == X86::AVX2_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3329,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// These are the replaceable SSE instructions. Some of these have Int variants
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
@@ -3366,31 +3670,66 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
{ X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
{ X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+};
+
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
+ { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
+ { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
+ { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
+ { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
+ { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
+ { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
return 0;
}
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+ if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+ return ReplaceableInstrsAVX2[i];
+ return 0;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
- return std::make_pair(domain,
- domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+ bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ uint16_t validDomains = 0;
+ if (domain && lookup(MI->getOpcode(), domain))
+ validDomains = 0xe;
+ else if (domain && lookupAVX2(MI->getOpcode(), domain))
+ validDomains = hasAVX2 ? 0xe : 0x6;
+ return std::make_pair(domain, validDomains);
}
void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
- const unsigned *table = lookup(MI->getOpcode(), dom);
+ const uint16_t *table = lookup(MI->getOpcode(), dom);
+ if (!table) { // try the other table
+ assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ "256-bit vector operations only available in AVX2");
+ table = lookupAVX2(MI->getOpcode(), dom);
+ }
assert(table && "Cannot change domain");
MI->setDesc(get(table[Domain-1]));
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 97009dbdbe50..b23d7560ec16 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//===-- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,10 +14,10 @@
#ifndef X86INSTRUCTIONINFO_H
#define X86INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "X86.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "X86GenInstrInfo.inc"
@@ -345,6 +345,11 @@ public:
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+ unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const;
+
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
unsigned OpNum,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d54bf275c04f..6a2531269d8f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -99,17 +99,16 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
-def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
[SDNPHasChain]>;
-def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
- [SDNPHasChain]>;
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
[SDNPHasChain]>;
def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
@@ -226,6 +225,10 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
+def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>;
+def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>;
+def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>;
+
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
@@ -237,6 +240,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
+ [SDNPHasChain, SDNPOutGlue]>;
+
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
@@ -247,10 +253,31 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
// *mem - Operand definitions for the funky X86 addressing mode operands.
//
-def X86MemAsmOperand : AsmOperandClass {
- let Name = "Mem";
- let SuperClasses = [];
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem"; let PredicateMethod = "isMem";
+}
+def X86Mem8AsmOperand : AsmOperandClass {
+ let Name = "Mem8"; let PredicateMethod = "isMem8";
+}
+def X86Mem16AsmOperand : AsmOperandClass {
+ let Name = "Mem16"; let PredicateMethod = "isMem16";
+}
+def X86Mem32AsmOperand : AsmOperandClass {
+ let Name = "Mem32"; let PredicateMethod = "isMem32";
+}
+def X86Mem64AsmOperand : AsmOperandClass {
+ let Name = "Mem64"; let PredicateMethod = "isMem64";
+}
+def X86Mem80AsmOperand : AsmOperandClass {
+ let Name = "Mem80"; let PredicateMethod = "isMem80";
+}
+def X86Mem128AsmOperand : AsmOperandClass {
+ let Name = "Mem128"; let PredicateMethod = "isMem128";
}
+def X86Mem256AsmOperand : AsmOperandClass {
+ let Name = "Mem256"; let PredicateMethod = "isMem256";
+}
+
def X86AbsMemAsmOperand : AsmOperandClass {
let Name = "AbsMem";
let SuperClasses = [X86MemAsmOperand];
@@ -267,17 +294,28 @@ def opaque48mem : X86MemOperand<"printopaquemem">;
def opaque80mem : X86MemOperand<"printopaquemem">;
def opaque512mem : X86MemOperand<"printopaquemem">;
-def i8mem : X86MemOperand<"printi8mem">;
-def i16mem : X86MemOperand<"printi16mem">;
-def i32mem : X86MemOperand<"printi32mem">;
-def i64mem : X86MemOperand<"printi64mem">;
-def i128mem : X86MemOperand<"printi128mem">;
-def i256mem : X86MemOperand<"printi256mem">;
-def f32mem : X86MemOperand<"printf32mem">;
-def f64mem : X86MemOperand<"printf64mem">;
-def f80mem : X86MemOperand<"printf80mem">;
-def f128mem : X86MemOperand<"printf128mem">;
-def f256mem : X86MemOperand<"printf256mem">;
+def i8mem : X86MemOperand<"printi8mem"> {
+ let ParserMatchClass = X86Mem8AsmOperand; }
+def i16mem : X86MemOperand<"printi16mem"> {
+ let ParserMatchClass = X86Mem16AsmOperand; }
+def i32mem : X86MemOperand<"printi32mem"> {
+ let ParserMatchClass = X86Mem32AsmOperand; }
+def i64mem : X86MemOperand<"printi64mem"> {
+ let ParserMatchClass = X86Mem64AsmOperand; }
+def i128mem : X86MemOperand<"printi128mem"> {
+ let ParserMatchClass = X86Mem128AsmOperand; }
+def i256mem : X86MemOperand<"printi256mem"> {
+ let ParserMatchClass = X86Mem256AsmOperand; }
+def f32mem : X86MemOperand<"printf32mem"> {
+ let ParserMatchClass = X86Mem32AsmOperand; }
+def f64mem : X86MemOperand<"printf64mem"> {
+ let ParserMatchClass = X86Mem64AsmOperand; }
+def f80mem : X86MemOperand<"printf80mem"> {
+ let ParserMatchClass = X86Mem80AsmOperand; }
+def f128mem : X86MemOperand<"printf128mem"> {
+ let ParserMatchClass = X86Mem128AsmOperand; }
+def f256mem : X86MemOperand<"printf256mem">{
+ let ParserMatchClass = X86Mem256AsmOperand; }
}
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
@@ -285,7 +323,7 @@ def f256mem : X86MemOperand<"printf256mem">;
def i8mem_NOREX : Operand<i64> {
let PrintMethod = "printi8mem";
let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem8AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
@@ -299,7 +337,7 @@ def ptr_rc_tailcall : PointerLikeRegClass<2>;
def i32mem_TC : Operand<i32> {
let PrintMethod = "printi32mem";
let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem32AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
@@ -310,7 +348,7 @@ def i64mem_TC : Operand<i64> {
let PrintMethod = "printi64mem";
let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
ptr_rc_tailcall, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem64AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
@@ -336,6 +374,11 @@ def SSECC : Operand<i8> {
let OperandType = "OPERAND_IMMEDIATE";
}
+def AVXCC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
class ImmSExtAsmOperandClass : AsmOperandClass {
let SuperClasses = [ImmAsmOperand];
let RenderMethod = "addImmOperands";
@@ -466,37 +509,32 @@ def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
-
def HasAVX : Predicate<"Subtarget->hasAVX()">;
-def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasXOP : Predicate<"Subtarget->hasXOP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
+def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
-def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
-def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
+def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<"Mode64Bit">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
-def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
-def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
- AssemblerPredicate<"ModeNaCl">;
-def IsNaCl32 : Predicate<"Subtarget->isTargetNaCl32()">,
- AssemblerPredicate<"ModeNaCl,!Mode64Bit">;
-def IsNaCl64 : Predicate<"Subtarget->isTargetNaCl64()">,
- AssemblerPredicate<"ModeNaCl,Mode64Bit">;
-def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">,
- AssemblerPredicate<"!ModeNaCl">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
@@ -1375,7 +1413,7 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
}
//===----------------------------------------------------------------------===//
-// TZCNT Instruction
+// BMI Instructions
//
let Predicates = [HasBMI], Defs = [EFLAGS] in {
def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1405,6 +1443,83 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
(implicit EFLAGS)]>, XS;
}
+multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
+ RegisterClass RC, X86MemOperand x86memop, SDNode OpNode,
+ PatFrag ld_frag> {
+ def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V;
+ def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>,
+ T8, VEX_4V;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem,
+ X86blsr_flag, loadi32>;
+ defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem,
+ X86blsr_flag, loadi64>, VEX_W;
+ defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem,
+ X86blsmsk_flag, loadi32>;
+ defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem,
+ X86blsmsk_flag, loadi64>, VEX_W;
+ defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem,
+ X86blsi_flag, loadi32>;
+ defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem,
+ X86blsi_flag, loadi64>, VEX_W;
+}
+
+multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int,
+ PatFrag ld_frag> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ T8, VEX_4VOp3;
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
+ (implicit EFLAGS)]>, T8, VEX_4VOp3;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem,
+ int_x86_bmi_bextr_32, loadi32>;
+ defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem,
+ int_x86_bmi_bextr_64, loadi64>, VEX_W;
+}
+
+let Predicates = [HasBMI2], Defs = [EFLAGS] in {
+ defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
+ int_x86_bmi_bzhi_32, loadi32>;
+ defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
+ int_x86_bmi_bzhi_64, loadi64>, VEX_W;
+}
+
+multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int,
+ PatFrag ld_frag> {
+ def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>,
+ VEX_4V;
+ def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>, VEX_4V;
+}
+
+let Predicates = [HasBMI2] in {
+ defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+ int_x86_bmi_pdep_32, loadi32>, T8XD;
+ defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+ int_x86_bmi_pdep_64, loadi64>, T8XD, VEX_W;
+ defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+ int_x86_bmi_pext_32, loadi32>, T8XS;
+ defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+ int_x86_bmi_pext_64, loadi64>, T8XS, VEX_W;
+}
+
//===----------------------------------------------------------------------===//
// Subsystems.
//===----------------------------------------------------------------------===//
@@ -1424,12 +1539,16 @@ include "X86InstrFragmentsSIMD.td"
// FMA - Fused Multiply-Add support (requires FMA)
include "X86InstrFMA.td"
+// XOP
+include "X86InstrXOP.td"
+
// SSE, MMX and 3DNow! vector support.
include "X86InstrSSE.td"
include "X86InstrMMX.td"
include "X86Instr3DNow.td"
include "X86InstrVMX.td"
+include "X86InstrSVM.td"
// System instructions.
include "X86InstrSystem.td"
@@ -1445,10 +1564,11 @@ def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwde", "cwtl">;
def : MnemonicAlias<"cwd", "cwtd">;
def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cwde", "cwtl">;
def : MnemonicAlias<"cdqe", "cltq">;
+def : MnemonicAlias<"cqo", "cqto">;
// lret maps to lretl, it is not ambiguous with lretq.
def : MnemonicAlias<"lret", "lretl">;
@@ -1497,6 +1617,7 @@ def : MnemonicAlias<"verrw", "verr">;
// System instruction aliases.
def : MnemonicAlias<"iret", "iretl">;
def : MnemonicAlias<"sysret", "sysretl">;
+def : MnemonicAlias<"sysexit", "sysexitl">;
def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
@@ -1516,6 +1637,8 @@ def : MnemonicAlias<"fcmovna", "fcmovbe">;
def : MnemonicAlias<"fcmovae", "fcmovnb">;
def : MnemonicAlias<"fcomip", "fcompi">;
def : MnemonicAlias<"fildq", "fildll">;
+def : MnemonicAlias<"fistpq", "fistpll">;
+def : MnemonicAlias<"fisttpq", "fisttpll">;
def : MnemonicAlias<"fldcww", "fldcw">;
def : MnemonicAlias<"fnstcww", "fnstcw">;
def : MnemonicAlias<"fnstsww", "fnstsw">;
@@ -1737,20 +1860,20 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
// errors, since its encoding is the most compact.
def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
-// shld/shrd op,op -> shld op, op, 1
-def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
-def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
-def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
-def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
-def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
-def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
-
-def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
-def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
-def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
-def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
-def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
-def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+// shld/shrd op,op -> shld op, op, CL
+def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
+def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
+
+def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
/* FIXME: This is disabled because the asm matcher is currently incapable of
* matching a fixed immediate like $1.
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index b2d9fca97b02..63f96b6f5d3b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -105,19 +105,23 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d> {
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ [(set DstRC:$dst, (Int SrcRC:$src))],
+ IIC_DEFAULT, d>;
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))],
+ IIC_DEFAULT, d>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, Domain d> {
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
+ IIC_DEFAULT, d>;
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
- [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
+ IIC_DEFAULT, d>;
}
//===----------------------------------------------------------------------===//
@@ -175,25 +179,25 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)]>;
-def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "movdq2q\t{$src, $dst|$dst, $src}",
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (bitconvert
(i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))))))]>;
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "movq2dq\t{$src, $dst|$dst, $src}",
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector
(i64 (bitconvert (x86mmx VR64:$src))))))]>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
- "movq2dq\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
- "movdq2q\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index d3ced23450fe..408ab16778d1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1,4 +1,4 @@
-//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,6 +13,126 @@
//
//===----------------------------------------------------------------------===//
+class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
+ InstrItinClass rr = arg_rr;
+ InstrItinClass rm = arg_rm;
+}
+
+class SizeItins<OpndItins arg_s, OpndItins arg_d> {
+ OpndItins s = arg_s;
+ OpndItins d = arg_d;
+}
+
+
+class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
+ InstrItinClass arg_ri> {
+ InstrItinClass rr = arg_rr;
+ InstrItinClass rm = arg_rm;
+ InstrItinClass ri = arg_ri;
+}
+
+
+// scalar
+def SSE_ALU_F32S : OpndItins<
+ IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
+>;
+
+def SSE_ALU_F64S : OpndItins<
+ IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
+>;
+
+def SSE_ALU_ITINS_S : SizeItins<
+ SSE_ALU_F32S, SSE_ALU_F64S
+>;
+
+def SSE_MUL_F32S : OpndItins<
+ IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
+>;
+
+def SSE_MUL_F64S : OpndItins<
+ IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
+>;
+
+def SSE_MUL_ITINS_S : SizeItins<
+ SSE_MUL_F32S, SSE_MUL_F64S
+>;
+
+def SSE_DIV_F32S : OpndItins<
+ IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
+>;
+
+def SSE_DIV_F64S : OpndItins<
+ IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
+>;
+
+def SSE_DIV_ITINS_S : SizeItins<
+ SSE_DIV_F32S, SSE_DIV_F64S
+>;
+
+// parallel
+def SSE_ALU_F32P : OpndItins<
+ IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
+>;
+
+def SSE_ALU_F64P : OpndItins<
+ IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
+>;
+
+def SSE_ALU_ITINS_P : SizeItins<
+ SSE_ALU_F32P, SSE_ALU_F64P
+>;
+
+def SSE_MUL_F32P : OpndItins<
+ IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
+>;
+
+def SSE_MUL_F64P : OpndItins<
+ IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
+>;
+
+def SSE_MUL_ITINS_P : SizeItins<
+ SSE_MUL_F32P, SSE_MUL_F64P
+>;
+
+def SSE_DIV_F32P : OpndItins<
+ IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
+>;
+
+def SSE_DIV_F64P : OpndItins<
+ IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
+>;
+
+def SSE_DIV_ITINS_P : SizeItins<
+ SSE_DIV_F32P, SSE_DIV_F64P
+>;
+
+def SSE_BIT_ITINS_P : OpndItins<
+ IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
+>;
+
+def SSE_INTALU_ITINS_P : OpndItins<
+ IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
+def SSE_INTALUQ_ITINS_P : OpndItins<
+ IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
+>;
+
+def SSE_INTMUL_ITINS_P : OpndItins<
+ IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
+>;
+
+def SSE_INTSHIFT_ITINS_P : ShiftOpndItins<
+ IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI
+>;
+
+def SSE_MOVA_ITINS : OpndItins<
+ IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM
+>;
+
+def SSE_MOVU_ITINS : OpndItins<
+ IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
+>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
@@ -21,25 +141,27 @@
/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, X86MemOperand x86memop,
+ OpndItins itins,
bit Is2Addr = 1> {
let isCommutable = 1 in {
def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
string asm, string SSEVer, string FPSizeStr,
Operand memopr, ComplexPattern mem_cpat,
+ OpndItins itins,
bit Is2Addr = 1> {
def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
@@ -47,72 +169,74 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))]>;
+ RC:$src1, RC:$src2))], itins.rr>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, mem_cpat:$src2))]>;
+ RC:$src1, mem_cpat:$src2))], itins.rm>;
}
/// sse12_fp_packed - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, ValueType vt,
X86MemOperand x86memop, PatFrag mem_frag,
- Domain d, bit Is2Addr = 1> {
+ Domain d, OpndItins itins, bit Is2Addr = 1> {
let isCommutable = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
+ itins.rm, d>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1> {
- let isCommutable = 1 in
+ bit Is2Addr = 1,
+ bit rr_hasSideEffects = 0> {
+ let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, d>;
+ pat_rr, IIC_DEFAULT, d>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, d>;
+ pat_rm, IIC_DEFAULT, d>;
}
/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
string asm, string SSEVer, string FPSizeStr,
X86MemOperand x86memop, PatFrag mem_frag,
- Domain d, bit Is2Addr = 1> {
+ Domain d, OpndItins itins, bit Is2Addr = 1> {
def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], d>;
+ RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, (mem_frag addr:$src2)))], d>;
+ RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
}
//===----------------------------------------------------------------------===//
@@ -170,7 +294,7 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
// Bitcasts between 128-bit vector types. Return the original type since
// no instruction is needed for the conversion
-let Predicates = [HasXMMInt] in {
+let Predicates = [HasSSE2] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@@ -239,21 +363,13 @@ let Predicates = [HasAVX] in {
}
// Alias instructions that map fld0 to pxor for sse.
-// FIXME: Set encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in {
- def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
- def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
- def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
- def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1 in {
+ def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
+ def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
}
//===----------------------------------------------------------------------===//
@@ -286,16 +402,35 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
// JIT implementatioan, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, Predicates = [HasAVX] in {
+ isCodeGenOnly = 1 in {
+let Predicates = [HasAVX] in {
def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
}
+let Predicates = [HasAVX2], neverHasSideEffects = 1 in
+def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
+ []>, VEX_4V;
+}
+let Predicates = [HasAVX2], AddedComplexity = 5 in {
+ def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
+ def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
+ def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
+ def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
+}
// AVX has no support for 256-bit integer instructions, but since the 128-bit
// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v16i16 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
+
def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
@@ -310,13 +445,16 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
// JIT implementation, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
+ let Predicates = [HasAVX] in
def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+ let Predicates = [HasAVX2] in
+ def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
+}
//===----------------------------------------------------------------------===//
@@ -329,22 +467,25 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
-class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
- [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+ [(set VR128:$dst, (vt (OpNode VR128:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>;
// Loading from memory automatically zeroing upper bits.
class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_pat, string OpcodeStr> :
SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))]>;
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>;
// AVX
-def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
"movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
VEX_LIG;
-def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
"movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
VEX_LIG;
@@ -352,11 +493,13 @@ def VMOVSDrr : sse12_move_rr<FR64, v2f64,
let isCodeGenOnly = 1 in {
def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_MOV_S_RR>,
XS, VEX_4V, VEX_LIG;
def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_MOV_S_RR>,
XD, VEX_4V, VEX_LIG;
}
@@ -370,26 +513,30 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
+ [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ XS, VEX, VEX_LIG;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;
+ [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ XD, VEX, VEX_LIG;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
- def MOVSSrr : sse12_move_rr<FR32, v4f32,
+ def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
"movss\t{$src2, $dst|$dst, $src2}">, XS;
- def MOVSDrr : sse12_move_rr<FR64, v2f64,
+ def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
"movsd\t{$src2, $dst|$dst, $src2}">, XD;
// For the disassembler
let isCodeGenOnly = 1 in {
def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}", []>, XS;
+ "movss\t{$src2, $dst|$dst, $src2}", [],
+ IIC_SSE_MOV_S_RR>, XS;
def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}", []>, XD;
+ "movsd\t{$src2, $dst|$dst, $src2}", [],
+ IIC_SSE_MOV_S_RR>, XD;
}
}
@@ -402,153 +549,14 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>;
+ [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>;
+ [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
// Patterns
-let Predicates = [HasSSE1] in {
- let AddedComplexity = 15 in {
- // Extract the low 32-bit value from one vector and insert it into another.
- def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
- def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSS to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
- }
-
- let AddedComplexity = 20 in {
- // MOVSSrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
- def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
- }
-
- // Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-
- // Shuffle with MOVSS
- def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
- (MOVSSrr VR128:$src1, FR32:$src2)>;
- def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
- def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-}
-
-let Predicates = [HasSSE2] in {
- let AddedComplexity = 15 in {
- // Extract the low 64-bit value from one vector and insert it into another.
- def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
- def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-
- // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
- def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
- def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
-
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSD to the lower bits.
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
- }
-
- let AddedComplexity = 20 in {
- // MOVSDrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
- def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
- def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
- }
-
- // Extract and store.
- def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
- // Shuffle with MOVSD
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
- (MOVSDrr VR128:$src1, FR64:$src2)>;
- def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
- def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
- def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
- def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
-
- // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
- // is during lowering, where it's not possible to recognize the fold cause
- // it has two uses through a bitcast. One use disappears at isel time and the
- // fold opportunity reappears.
- def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
- def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
-}
-
let Predicates = [HasAVX] in {
let AddedComplexity = 15 in {
- // Extract the low 32-bit value from one vector and insert it into another.
- def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
- (VMOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
- def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
- (VMOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-
- // Extract the low 64-bit value from one vector and insert it into another.
- def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
- def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-
- // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
- def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
- def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
-
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVS{S,D} to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
@@ -561,6 +569,16 @@ let Predicates = [HasAVX] in {
(EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
}
let AddedComplexity = 20 in {
@@ -588,6 +606,9 @@ let Predicates = [HasAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
@@ -605,6 +626,20 @@ let Predicates = [HasAVX] in {
(SUBREG_TO_REG (i64 0),
(v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
+
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -617,8 +652,6 @@ let Predicates = [HasAVX] in {
(EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Shuffle with VMOVSS
- def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
- (VMOVSSrr VR128:$src1, FR32:$src2)>;
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4i32 VR128:$src1),
(EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
@@ -626,9 +659,17 @@ let Predicates = [HasAVX] in {
(VMOVSSrr (v4f32 VR128:$src1),
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ // 256-bit variants
+ def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+ def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+
// Shuffle with VMOVSD
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
- (VMOVSDrr VR128:$src1, FR64:$src2)>;
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
(VMOVSDrr (v2i64 VR128:$src1),
(EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
@@ -642,10 +683,27 @@ let Predicates = [HasAVX] in {
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
sub_sd))>;
+ // 256-bit variants
+ def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+ def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+
+
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
+ sub_sd))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
sub_sd))>;
@@ -654,6 +712,101 @@ let Predicates = [HasAVX] in {
sub_sd))>;
}
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSS to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+ // Shuffle with MOVSS
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSD to the lower bits.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with MOVSD
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
//===----------------------------------------------------------------------===//
@@ -661,126 +814,176 @@ let Predicates = [HasAVX] in {
multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d,
+ OpndItins itins,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], d>;
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
- "movaps", SSEPackedSingle>, TB, VEX;
+ "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+ TB, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble>, TB, OpSize, VEX;
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ TB, OpSize, VEX;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
- "movups", SSEPackedSingle>, TB, VEX;
+ "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+ TB, VEX;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
- "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
+ "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+ TB, OpSize, VEX;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
- "movaps", SSEPackedSingle>, TB, VEX;
+ "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+ TB, VEX;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
- "movapd", SSEPackedDouble>, TB, OpSize, VEX;
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ TB, OpSize, VEX;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
- "movups", SSEPackedSingle>, TB, VEX;
+ "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+ TB, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
- "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
+ "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+ TB, OpSize, VEX;
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
- "movaps", SSEPackedSingle>, TB;
+ "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+ TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble>, TB, OpSize;
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ TB, OpSize;
defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
- "movups", SSEPackedSingle>, TB;
+ "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+ TB;
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
- "movupd", SSEPackedDouble, 0>, TB, OpSize;
+ "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+ TB, OpSize;
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX;
def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX;
def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
- [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+ [(store (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX;
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+ [(store (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX;
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX;
+ [(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX;
+ [(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
- [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+ [(store (v8f32 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
- [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+ [(store (v4f64 VR256:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>, VEX;
// For disassembler
let isCodeGenOnly = 1 in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movups\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX;
def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movupd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX;
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
- "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movups\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movupd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>, VEX;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzmovl
+ (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v8f32 (X86vzmovl
+ (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4f64 (X86vzmovl
+ (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
}
-def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
-
-def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
(VMOVUPDYmr addr:$dst, VR256:$src)>;
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>;
def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>;
def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
- [(store (v4f32 VR128:$src), addr:$dst)]>;
+ [(store (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>;
def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>;
+ [(store (v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVU_P_MR>;
// For disassembler
let isCodeGenOnly = 1 in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>;
+ "movups\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>;
def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>;
+ "movupd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>;
}
let Predicates = [HasAVX] in {
@@ -797,44 +1000,9 @@ let Predicates = [HasSSE2] in
def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
(MOVUPDmr addr:$dst, VR128:$src)>;
-// Use movaps / movups for SSE integer load / store (one byte shorter).
-// The instructions selected below are then converted to MOVDQA/MOVDQU
-// during the SSE domain pass.
-let Predicates = [HasSSE1] in {
- def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-}
-
// Use vmovaps/vmovups for AVX integer load/store.
let Predicates = [HasAVX] in {
// 128-bit load/store
- def : Pat<(alignedloadv4i32 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (VMOVUPSrm addr:$src)>;
def : Pat<(alignedloadv2i64 addr:$src),
(VMOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
@@ -862,10 +1030,6 @@ let Predicates = [HasAVX] in {
(VMOVAPSYrm addr:$src)>;
def : Pat<(loadv4i64 addr:$src),
(VMOVUPSYrm addr:$src)>;
- def : Pat<(alignedloadv8i32 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(loadv8i32 addr:$src),
- (VMOVUPSYrm addr:$src)>;
def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
@@ -884,36 +1048,71 @@ let Predicates = [HasAVX] in {
(VMOVUPSYmr addr:$dst, VR256:$src)>;
}
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
let neverHasSideEffects = 1 in {
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>, VEX;
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
}
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
let isCodeGenOnly = 1 in {
def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX;
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+ IIC_SSE_MOVA_P_RM>, VEX;
def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX;
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>, VEX;
}
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
}
//===----------------------------------------------------------------------===//
@@ -921,94 +1120,68 @@ let isCodeGenOnly = 1 in {
//===----------------------------------------------------------------------===//
multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
- PatFrag mov_frag, string base_opc,
- string asm_opr> {
+ SDNode psnode, SDNode pdnode, string base_opc,
+ string asm_opr, InstrItinClass itin> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
[(set RC:$dst,
- (mov_frag RC:$src1,
+ (psnode RC:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- SSEPackedSingle>, TB;
+ itin, SSEPackedSingle>, TB;
def PDrm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
- [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+ [(set RC:$dst, (v2f64 (pdnode RC:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- SSEPackedDouble>, TB, OpSize;
+ itin, SSEPackedDouble>, TB, OpSize;
}
let AddedComplexity = 20 in {
- defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+ defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ IIC_SSE_MOV_LH>, VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
- "\t{$src2, $dst|$dst, $src2}">;
+ defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
+ "\t{$src2, $dst|$dst, $src2}",
+ IIC_SSE_MOV_LH>;
}
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>, VEX;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>, VEX;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>, VEX;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>, VEX;
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>;
def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>;
let Predicates = [HasAVX] in {
- let AddedComplexity = 20 in {
- // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
- def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
- (VMOVLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
- (VMOVLPSrm VR128:$src1, addr:$src2)>;
- // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
- def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
- (VMOVLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
- (VMOVLPDrm VR128:$src1, addr:$src2)>;
- }
-
- // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
- def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (VMOVLPSmr addr:$src1, VR128:$src2)>;
- def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
- VR128:$src2)), addr:$src1),
- (VMOVLPSmr addr:$src1, VR128:$src2)>;
-
- // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
- def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (VMOVLPDmr addr:$src1, VR128:$src2)>;
- def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (VMOVLPDmr addr:$src1, VR128:$src2)>;
-
// Shuffle with VMOVLPS
def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
(VMOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
(VMOVLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86Movlps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
- (VMOVLPSrm VR128:$src1, addr:$src2)>;
// Shuffle with VMOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(VMOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(VMOVLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Movlpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (VMOVLPDrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
@@ -1026,19 +1199,9 @@ let Predicates = [HasAVX] in {
}
let Predicates = [HasSSE1] in {
- let AddedComplexity = 20 in {
- // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
- def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
- }
-
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
- def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
- def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
- VR128:$src2)), addr:$src1),
+ def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+ (iPTR 0))), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
// Shuffle with MOVLPS
@@ -1047,7 +1210,7 @@ let Predicates = [HasSSE1] in {
def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
// Store patterns
@@ -1061,28 +1224,11 @@ let Predicates = [HasSSE1] in {
}
let Predicates = [HasSSE2] in {
- let AddedComplexity = 20 in {
- // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
- def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
- }
-
- // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
- def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
- def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-
// Shuffle with MOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Movlpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
@@ -1098,12 +1244,14 @@ let Predicates = [HasSSE2] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 20 in {
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ IIC_SSE_MOV_LH>, VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
- "\t{$src2, $dst|$dst, $src2}">;
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
+ "\t{$src2, $dst|$dst, $src2}",
+ IIC_SSE_MOV_LH>;
}
// v2f64 extract element 1 is always custom lowered to unpack high to low
@@ -1111,94 +1259,62 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
- (unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (undef)), (iPTR 0))), addr:$dst)]>,
- VEX;
+ (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (bc_v2f64 (v4f32 VR128:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
- (v2f64 (unpckh VR128:$src, (undef))),
- (iPTR 0))), addr:$dst)]>,
- VEX;
+ (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
- (unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (undef)), (iPTR 0))), addr:$dst)]>;
+ (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (bc_v2f64 (v4f32 VR128:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
- (v2f64 (unpckh VR128:$src, (undef))),
- (iPTR 0))), addr:$dst)]>;
+ (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
let Predicates = [HasAVX] in {
// VMOVHPS patterns
- def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
- // is during lowering, where it's not possible to recognize the load fold cause
- // it has two uses through a bitcast. One use disappears at isel time and the
- // fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold
+ // cause it has two uses through a bitcast. One use disappears at isel time
+ // and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
-
- // FIXME: This should be matched by a X86Movhpd instead. Same as above
- def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (VMOVHPDrm VR128:$src1, addr:$src2)>;
-
- // Store patterns
- def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
- (VMOVHPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
- (VMOVHPDmr addr:$dst, VR128:$src)>;
}
let Predicates = [HasSSE1] in {
// MOVHPS patterns
- def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
-
- // Store patterns
- def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
- (MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [HasSSE2] in {
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
- // is during lowering, where it's not possible to recognize the load fold cause
- // it has two uses through a bitcast. One use disappears at isel time and the
- // fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold
+ // cause it has two uses through a bitcast. One use disappears at isel time
+ // and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
-
- // FIXME: This should be matched by a X86Movhpd instead. Same as above
- def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
-
- // Store patterns
- def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
- (MOVHPDmr addr:$dst, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -1210,13 +1326,15 @@ let AddedComplexity = 20 in {
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+ (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>,
VEX_4V;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+ (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>,
VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
@@ -1224,86 +1342,36 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
+ IIC_SSE_MOV_LH>;
}
let Predicates = [HasAVX] in {
// MOVLHPS patterns
- let AddedComplexity = 20 in {
- def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
- def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
-
- // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
- def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
- (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
- }
- def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
- (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(VMOVLHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
(VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
// MOVHLPS patterns
- let AddedComplexity = 20 in {
- // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
- def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
- (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
-
- // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
- def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
- (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
- def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
- (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
- }
-
- def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
- (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
(VMOVHLPSrr VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasSSE1] in {
// MOVLHPS patterns
- let AddedComplexity = 20 in {
- def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
- def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
-
- // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
- def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
- }
- def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
// MOVHLPS patterns
- let AddedComplexity = 20 in {
- // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
- def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-
- // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
- def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
- (MOVHLPSrr VR128:$src1, VR128:$src1)>;
- def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
- (MOVHLPSrr VR128:$src1, VR128:$src1)>;
- }
-
- def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
(MOVHLPSrr VR128:$src1, VR128:$src2)>;
}
@@ -1312,70 +1380,97 @@ let Predicates = [HasSSE1] in {
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//
+def SSE_CVT_PD : OpndItins<
+ IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
+>;
+
+def SSE_CVT_PS : OpndItins<
+ IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
+>;
+
+def SSE_CVT_Scalar : OpndItins<
+ IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
+>;
+
+def SSE_CVT_SS2SI_32 : OpndItins<
+ IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
+>;
+
+def SSE_CVT_SS2SI_64 : OpndItins<
+ IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
+>;
+
+def SSE_CVT_SD2SI : OpndItins<
+ IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
+>;
+
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
+ string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+ [(set DstRC:$dst, (OpNode SrcRC:$src))],
+ itins.rr>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
-}
-
-multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- X86MemOperand x86memop, string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;
- let mayLoad = 1 in
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>;
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
+ itins.rm>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d> {
+ string asm, Domain d, OpndItins itins> {
def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+ [(set DstRC:$dst, (OpNode SrcRC:$src))],
+ itins.rr, d>;
def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
+ itins.rm, d>;
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
+let neverHasSideEffects = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+} // neverHasSideEffects = 1
}
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_LIG;
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_32>,
+ XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_W, VEX_LIG;
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
- VEX_LIG;
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>,
+ XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
- VEX, VEX_W, VEX_LIG;
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W, VEX_LIG;
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
- VEX_4V, VEX_LIG;
-defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
- VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
- VEX_4V, VEX_LIG;
-defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
- VEX_4V, VEX_LIG;
-defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
- VEX_4V, VEX_W, VEX_LIG;
-
-let Predicates = [HasAVX] in {
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
+ XS, VEX_4V, VEX_LIG;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
+ XS, VEX_4V, VEX_W, VEX_LIG;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
+ XD, VEX_4V, VEX_LIG;
+defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
+ XD, VEX_4V, VEX_LIG;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
+ XD, VEX_4V, VEX_W, VEX_LIG;
+
+let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -1396,169 +1491,185 @@ let Predicates = [HasAVX] in {
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_32>, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SD2SI>, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+ "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XS;
defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+ "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XD;
defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_Scalar>, XD, REX_W;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
+ string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm, bit Is2Addr = 1> {
+ PatFrag ld_frag, string asm, OpndItins itins,
+ bit Is2Addr = 1> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
+ itins.rr>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
+ itins.rm>;
}
-defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD, VEX;
-defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
- XD, VEX, VEX_W;
-
-// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
-// Get rid of this hack or rename the intrinsics, there are several
-// intructions that only match with the intrinsic form, why create duplicates
-// to let them be recognized by the assembler?
-defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG;
-defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W,
- VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si{l}">, XD;
+ f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si{q}">, XD, REX_W;
+ f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
+ SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
+ SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
+ SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
+ SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss">, XS;
+ "cvtsi2ss", SSE_CVT_Scalar>, XS;
defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64,
- "cvtsi2ss{q}">, XS, REX_W;
+ "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2sd">, XD;
+ "cvtsi2sd", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
- "cvtsi2sd">, XD, REX_W;
+ "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
}
/// SSE 1 Only
// Aliases for intrinsics
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si">, XS, VEX;
+ f32mem, load, "cvttss2si",
+ SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si">, XS, VEX, VEX_W;
+ "cvttss2si", SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si">, XD, VEX;
+ f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
+ XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si">, XD, VEX, VEX_W;
+ "cvttsd2si", SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si">, XS;
+ f32mem, load, "cvttss2si",
+ SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si{q}">, XS, REX_W;
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si">, XD;
+ f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
+ XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si{q}">, XD, REX_W;
+ "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ XD, REX_W;
let Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
- VEX, VEX_LIG;
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
- "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_W, VEX_LIG;
+ "cvtss2si\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_32>, XS;
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
- "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB; /* PD SSE3 form is avaiable */
}
-let Predicates = [HasSSE1] in {
+let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (CVTSS2SIrm addr:$src)>;
+ (VCVTSS2SIrm addr:$src)>;
def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (CVTSS2SI64rm addr:$src)>;
+ (VCVTSS2SI64rm addr:$src)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE1] in {
def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (VCVTSS2SIrm addr:$src)>;
+ (CVTSS2SIrm addr:$src)>;
def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (VCVTSS2SI64rm addr:$src)>;
+ (CVTSS2SI64rm addr:$src)>;
}
/// SSE 2 Only
@@ -1566,43 +1677,51 @@ let Predicates = [HasAVX] in {
// Convert scalar double to scalar single
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_LIG;
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+ [], IIC_SSE_CVT_Scalar_RM>,
+ XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fround FR64:$src))]>;
+ [(set FR32:$dst, (fround FR64:$src))],
+ IIC_SSE_CVT_Scalar_RR>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))],
+ IIC_SSE_CVT_Scalar_RM>,
+ XD,
Requires<[HasSSE2, OptForSize]>;
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
+ SSE_CVT_Scalar, 0>,
XS, VEX_4V;
let Constraints = "$src1 = $dst" in
defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
+ SSE_CVT_Scalar>, XS;
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+ [], IIC_SSE_CVT_Scalar_RR>,
+ XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+ [], IIC_SSE_CVT_Scalar_RM>,
+ XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
let Predicates = [HasAVX] in {
def : Pat<(f64 (fextend FR32:$src)),
@@ -1619,11 +1738,13 @@ def : Pat<(extloadf32 addr:$src),
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+ [(set FR64:$dst, (fextend FR32:$src))],
+ IIC_SSE_CVT_Scalar_RR>, XS,
Requires<[HasSSE2]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+ [(set FR64:$dst, (extloadf32 addr:$src))],
+ IIC_SSE_CVT_Scalar_RM>, XS,
Requires<[HasSSE2, OptForSize]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
@@ -1640,26 +1761,30 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))]>, XS, VEX_4V,
+ VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
Requires<[HasAVX]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))]>, XS, VEX_4V,
+ (load addr:$src2)))],
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
Requires<[HasAVX]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))]>, XS,
+ VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS,
Requires<[HasSSE2]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))]>, XS,
+ (load addr:$src2)))],
+ IIC_SSE_CVT_Scalar_RM>, XS,
Requires<[HasSSE2]>;
}
@@ -1667,216 +1792,275 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
// SSE2 instructions without OpSize prefix
def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
+ IIC_SSE_CVT_PS_RR>,
TB, VEX, Requires<[HasAVX]>;
def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))]>,
+ (bitconvert (memopv2i64 addr:$src))))],
+ IIC_SSE_CVT_PS_RM>,
TB, VEX, Requires<[HasAVX]>;
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
+ IIC_SSE_CVT_PS_RR>,
TB, Requires<[HasSSE2]>;
def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))]>,
+ (bitconvert (memopv2i64 addr:$src))))],
+ IIC_SSE_CVT_PS_RM>,
TB, Requires<[HasSSE2]>;
// FIXME: why the non-intrinsic version is described as SSE3?
// SSE2 instructions with XS prefix
def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RR>,
XS, VEX, Requires<[HasAVX]>;
def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))]>,
+ (bitconvert (memopv2i64 addr:$src))))],
+ IIC_SSE_CVT_PD_RM>,
XS, VEX, Requires<[HasAVX]>;
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RR>,
XS, Requires<[HasSSE2]>;
def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))]>,
+ (bitconvert (memopv2i64 addr:$src))))],
+ IIC_SSE_CVT_PD_RM>,
XS, Requires<[HasSSE2]>;
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PS_RM>, VEX;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PS_RM>, VEX;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PS_RR>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PS_RM>;
def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>,
VEX;
def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
(ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))]>, VEX;
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>;
def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))]>;
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PS_RM>;
// SSE2 packed instructions with XD prefix
def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>,
XD, VEX, Requires<[HasAVX]>;
def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))]>,
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PD_RM>,
XD, VEX, Requires<[HasAVX]>;
def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>,
XD, Requires<[HasSSE2]>;
def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))]>,
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PD_RM>,
XD, Requires<[HasSSE2]>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-let mayLoad = 1 in
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-let mayLoad = 1 in
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+ (memopv8f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))]>;
+ (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-
-def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))]>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))]>,
- XS, VEX, Requires<[HasAVX]>;
-
-let Predicates = [HasSSE2] in {
- def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_CVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (CVTTPS2DQrr VR128:$src)>;
-}
+ (int_x86_sse2_cvttps2dq (memop addr:$src)))],
+ IIC_SSE_CVT_PS_RM>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (Int_VCVTDQ2PSrm addr:$src)>;
+
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ (VCVTTPS2DQrm addr:$src)>;
+
def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
(VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VCVTDQ2PSYrm addr:$src)>;
+
def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
(VCVTTPS2DQYrr VR256:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
+ (VCVTTPS2DQYrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (Int_CVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ (CVTTPS2DQrm addr:$src)>;
}
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX;
+ (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>, VEX;
let isCodeGenOnly = 1 in
def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>, VEX;
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, VEX;
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>;
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
// Convert packed single to packed double
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, TB, VEX;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, TB, VEX;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, TB, VEX;
}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+ "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, TB;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+ "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, TB;
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RR>,
TB, VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))]>,
+ (load addr:$src)))],
+ IIC_SSE_CVT_PD_RM>,
TB, VEX, Requires<[HasAVX]>;
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RR>,
TB, Requires<[HasSSE2]>;
def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))]>,
+ (load addr:$src)))],
+ IIC_SSE_CVT_PD_RM>,
TB, Requires<[HasSSE2]>;
// Convert packed double to packed single
@@ -1884,49 +2068,61 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
(ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))]>;
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))]>;
+ (memop addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
(VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
@@ -1949,11 +2145,6 @@ def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
(VCVTTPD2DQYrm addr:$src)>;
-def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
- (VCVTTPS2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
- (VCVTTPS2DQYrm addr:$src)>;
-
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
@@ -1971,70 +2162,85 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- SDNode OpNode, ValueType VT, PatFrag ld_frag,
- string asm, string asm_alt> {
+ Operand CC, SDNode OpNode, ValueType VT,
+ PatFrag ld_frag, string asm, string asm_alt,
+ OpndItins itins> {
def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
- [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>;
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ itins.rr>;
def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), imm:$cc))]>;
+ (ld_frag addr:$src2), imm:$cc))],
+ itins.rm>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>;
+ (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
+ IIC_SSE_ALU_F32S_RR>;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>;
+ (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
+ IIC_SSE_ALU_F32S_RM>;
}
}
-defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSE_ALU_F32S>,
XS, VEX_4V, VEX_LIG;
-defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSE_ALU_F32S>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in {
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
XS;
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SSE_ALU_F32S>, // same latency as 32 bit compare
XD;
}
-multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
- Intrinsic Int, string asm> {
+multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
+ Intrinsic Int, string asm, OpndItins itins> {
def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ (ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- VR128:$src, imm:$cc))]>;
+ VR128:$src, imm:$cc))],
+ itins.rr>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+ (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- (load addr:$src), imm:$cc))]>;
+ (load addr:$src), imm:$cc))],
+ itins.rm>;
}
// Aliases to match intrinsics which expect XMM operand(s).
-defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ SSE_ALU_F32S>,
XS, VEX_4V;
-defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ SSE_ALU_F32S>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
- defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+ defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ SSE_ALU_F32S>, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ SSE_ALU_F32S>, // same latency as f32
+ XD;
}
@@ -2044,11 +2250,13 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
PatFrag ld_frag, string OpcodeStr, Domain d> {
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+ IIC_SSE_COMIS_RR, d>;
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
- (ld_frag addr:$src2)))], d>;
+ (ld_frag addr:$src2)))],
+ IIC_SSE_COMIS_RM, d>;
}
let Defs = [EFLAGS] in {
@@ -2098,89 +2306,91 @@ let Defs = [EFLAGS] in {
"comisd", SSEPackedDouble>, TB, OpSize;
} // Defs = [EFLAGS]
-// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- Intrinsic Int, string asm, string asm_alt,
- Domain d> {
- let isAsmParserOnly = 1 in {
- def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>;
- def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>;
- }
+ Operand CC, Intrinsic Int, string asm,
+ string asm_alt, Domain d> {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
+ IIC_SSE_CMPP_RR, d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
+ IIC_SSE_CMPP_RM, d>;
// Accept explicit immediate argument form instead of comparison code.
- def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], d>;
- def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc),
- asm_alt, [], d>;
+ let neverHasSideEffects = 1 in {
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_CMPP_RR, d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ }
}
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedSingle>, TB, VEX_4V;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedSingle>, TB, VEX_4V;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedSingle>, TB;
- defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedDouble>, TB, OpSize;
}
-let Predicates = [HasSSE1] in {
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-}
-
-let Predicates = [HasSSE2] in {
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-}
-
let Predicates = [HasAVX] in {
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
(VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
-def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
(VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
-def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
}
+let Predicates = [HasSSE1] in {
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasSSE2] in {
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
//===----------------------------------------------------------------------===//
@@ -2190,14 +2400,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
Domain d, bit IsConvertibleToThreeAddress = 0> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
- [(set RC:$dst, (vt (shufp:$src3
- RC:$src1, (mem_frag addr:$src2))))], d>;
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
- [(set RC:$dst,
- (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2220,133 +2430,52 @@ let Constraints = "$src1 = $dst" in {
TB;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, TB, OpSize;
-}
-
-let Predicates = [HasSSE1] in {
- def : Pat<(v4f32 (X86Shufps VR128:$src1,
- (memopv4f32 addr:$src2), (i8 imm:$imm))),
- (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
- def : Pat<(v4i32 (X86Shufps VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
- (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
- // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
- // fall back to this for SSE1)
- def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
- (SHUFPSrri VR128:$src2, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special unary SHUFPSrri case.
- def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
-}
-
-let Predicates = [HasSSE2] in {
- // Special binary v4i32 shuffle cases with SHUFPS.
- def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
- (SHUFPSrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))),
- (SHUFPSrmi VR128:$src1, addr:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special unary SHUFPDrri cases.
- def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special binary v2i64 shuffle cases using SHUFPDrri.
- def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
- (SHUFPDrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Generic SHUFPD patterns
- def : Pat<(v2f64 (X86Shufps VR128:$src1,
- (memopv2f64 addr:$src2), (i8 imm:$imm))),
- (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
- def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>,
+ TB, OpSize;
}
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86Shufps VR128:$src1,
- (memopv4f32 addr:$src2), (i8 imm:$imm))),
- (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
- def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ def : Pat<(v4i32 (X86Shufp VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
- // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
- // fall back to this for SSE1)
- def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
- (VSHUFPSrri VR128:$src2, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special unary SHUFPSrri case.
- def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
- (VSHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special binary v4i32 shuffle cases with SHUFPS.
- def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
- (VSHUFPSrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VSHUFPSrmi VR128:$src1, addr:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special unary SHUFPDrri cases.
- def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
- (VSHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
- (VSHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
- // Special binary v2i64 shuffle cases using SHUFPDrri.
- def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
- (VSHUFPDrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
-
- def : Pat<(v2f64 (X86Shufps VR128:$src1,
- (memopv2f64 addr:$src2), (i8 imm:$imm))),
+
+ def : Pat<(v2i64 (X86Shufp VR128:$src1,
+ (memopv2i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
- def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
// 256-bit patterns
- def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
- def : Pat<(v8i32 (X86Shufps VR256:$src1,
+ def : Pat<(v8i32 (X86Shufp VR256:$src1,
(bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
- def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
- def : Pat<(v8f32 (X86Shufps VR256:$src1,
- (memopv8f32 addr:$src2), (i8 imm:$imm))),
- (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
-
- def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
- def : Pat<(v4i64 (X86Shufpd VR256:$src1,
+ def : Pat<(v4i64 (X86Shufp VR256:$src1,
(memopv4i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+}
- def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
- def : Pat<(v4f64 (X86Shufpd VR256:$src1,
- (memopv4f64 addr:$src2), (i8 imm:$imm))),
- (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4i32 (X86Shufp VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasSSE2] in {
+ // Generic SHUFPD patterns
+ def : Pat<(v2i64 (X86Shufp VR128:$src1,
+ (memopv2i64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -2354,159 +2483,80 @@ let Predicates = [HasAVX] in {
//===----------------------------------------------------------------------===//
/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
-multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
PatFrag mem_frag, RegisterClass RC,
X86MemOperand x86memop, string asm,
Domain d> {
def rr : PI<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
- (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ (vt (OpNode RC:$src1, RC:$src2)))],
+ IIC_SSE_UNPCK, d>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
- (mem_frag addr:$src2))))], d>;
-}
-
-let AddedComplexity = 10 in {
- defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
- VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, TB, VEX_4V;
- defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
- VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
- defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
- VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, TB, VEX_4V;
- defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
- VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
-
- defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
- VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, TB, VEX_4V;
- defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
- VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
- defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
- VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, TB, VEX_4V;
- defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
- VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, TB, OpSize, VEX_4V;
-
- let Constraints = "$src1 = $dst" in {
- defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
- VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, TB;
- defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
- VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
- SSEPackedDouble>, TB, OpSize;
- defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
- VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, TB;
- defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
- VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
- SSEPackedDouble>, TB, OpSize;
- } // Constraints = "$src1 = $dst"
-} // AddedComplexity
+ (mem_frag addr:$src2))))],
+ IIC_SSE_UNPCK, d>;
+}
+
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
-let Predicates = [HasSSE1] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
- (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
- (UNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
- (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
-}
+let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+} // Constraints = "$src1 = $dst"
-let Predicates = [HasSSE2] in {
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
- (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
- (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
-
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+let Predicates = [HasAVX], AddedComplexity = 1 in {
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
def : Pat<(v2f64 (X86Movddup VR128:$src)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>;
-
- let AddedComplexity = 10 in
- def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>;
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
}
-let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
- (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
- (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
- (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
- (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
- (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+let Predicates = [HasSSE2] in {
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
def : Pat<(v2f64 (X86Movddup VR128:$src)),
- (VUNPCKLPDrr VR128:$src, VR128:$src)>;
- let AddedComplexity = 10 in
- def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
- (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -2518,29 +2568,12 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], d>;
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVMSK, d>, REX_W;
}
-defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
- SSEPackedSingle>, TB;
-defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
- SSEPackedDouble>, TB, OpSize;
-
-def : Pat<(i32 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
-def : Pat<(i64 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
-def : Pat<(i32 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
-def : Pat<(i64 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
-
let Predicates = [HasAVX] in {
defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
"movmskps", SSEPackedSingle>, TB, VEX;
@@ -2568,17 +2601,105 @@ let Predicates = [HasAVX] in {
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedSingle>, TB, VEX;
def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedDouble>, TB,
OpSize, VEX;
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedSingle>, TB, VEX;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+ SSEPackedDouble>, TB,
OpSize, VEX;
}
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
+def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0,
+ bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))],
+ itins.rm>;
+}
+} // ExeDomain = SSEPackedInt
+
+// These are ordered here for pattern ordering requirements with the fp versions
+
+let Predicates = [HasAVX] in {
+defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 1>;
+defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 1>;
+defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
+ i128mem, SSE_BIT_ITINS_P, 0>;
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX2] in {
+defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
+ i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
+ i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
+ i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
+ i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
//===----------------------------------------------------------------------===//
@@ -2586,31 +2707,39 @@ let Predicates = [HasAVX] in {
/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
///
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V;
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>,
+ TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V;
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>,
+ TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
- f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+ f32, f128mem, memopfsf32, SSEPackedSingle, itins>,
+ TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
- f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+ f64, f128mem, memopfsf64, SSEPackedDouble, itins>,
+ TB, OpSize;
}
}
// Alias bitwise logical operations using SSE logical ops on packed FP values.
let mayLoad = 0 in {
- defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
- defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
- defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+ SSE_BIT_ITINS_P>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
+ SSE_BIT_ITINS_P>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+ SSE_BIT_ITINS_P>;
}
let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
- defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
+ SSE_BIT_ITINS_P>;
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
@@ -2623,7 +2752,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+ (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2697,118 +2826,145 @@ let isCommutable = 0 in
/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
/// classes below
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins,
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
- OpNode, FR32, f32mem, Is2Addr>, XS;
+ OpNode, FR32, f32mem,
+ itins.s, Is2Addr>, XS;
defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
- OpNode, FR64, f64mem, Is2Addr>, XD;
+ OpNode, FR64, f64mem,
+ itins.d, Is2Addr>, XD;
}
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins,
bit Is2Addr = 1> {
let mayLoad = 0 in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+ v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
+ TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+ v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
+ TB, OpSize;
}
}
multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
+ SDNode OpNode,
+ SizeItins itins> {
let mayLoad = 0 in {
defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
- v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+ v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
+ TB;
defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
- v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+ v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
+ TB, OpSize;
}
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ SizeItins itins,
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+ itins.s, Is2Addr>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+ itins.d, Is2Addr>, XD;
}
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+ SizeItins itins,
bit Is2Addr = 1> {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
- SSEPackedSingle, Is2Addr>, TB;
+ SSEPackedSingle, itins.s, Is2Addr>,
+ TB;
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
- SSEPackedDouble, Is2Addr>, TB, OpSize;
+ SSEPackedDouble, itins.d, Is2Addr>,
+ TB, OpSize;
}
-multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
+ SizeItins itins> {
defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
!strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
- SSEPackedSingle, 0>, TB;
+ SSEPackedSingle, itins.s, 0>, TB;
defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
!strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
- SSEPackedDouble, 0>, TB, OpSize;
+ SSEPackedDouble, itins.d, 0>, TB, OpSize;
}
// Binary Arithmetic instructions
-defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
-defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
-defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
+ VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
+ VEX_4V;
let isCommutable = 0 in {
- defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
- defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
- defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
- defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
- defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
- defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
- defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
- defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min">,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
+ VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
+ VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
+ VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
- basic_sse12_fp_binop_p<0x58, "add", fadd>,
- basic_sse12_fp_binop_s_int<0x58, "add">;
- defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul>,
- basic_sse12_fp_binop_s_int<0x59, "mul">;
+ defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
+ defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
- basic_sse12_fp_binop_s_int<0x5C, "sub">;
- defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
- basic_sse12_fp_binop_s_int<0x5E, "div">;
- defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
- basic_sse12_fp_binop_s_int<0x5F, "max">,
- basic_sse12_fp_binop_p_int<0x5F, "max">;
- defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
- basic_sse12_fp_binop_s_int<0x5D, "min">,
- basic_sse12_fp_binop_p_int<0x5D, "min">;
+ defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
+ defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
+ defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
+ defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
}
}
@@ -2820,9 +2976,25 @@ let Constraints = "$src1 = $dst" in {
///
/// And, we have a special variant form for a full-vector intrinsic form.
+def SSE_SQRTP : OpndItins<
+ IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
+>;
+
+def SSE_SQRTS : OpndItins<
+ IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
+>;
+
+def SSE_RCPP : OpndItins<
+ IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
+>;
+
+def SSE_RCPS : OpndItins<
+ IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
+>;
+
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F32Int> {
+ SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode FR32:$src))]>;
@@ -2832,14 +3004,14 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
// partial register update condition.
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
Requires<[HasSSE1, OptForSize]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))]>;
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
}
/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
@@ -2852,80 +3024,91 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins ssmem:$src1, VR128:$src2),
+ (ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
-multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
}
/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
-multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+ itins.rr>;
def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ itins.rm>;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int> {
+ Intrinsic V4F32Int, OpndItins itins> {
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+ [(set VR128:$dst, (V4F32Int VR128:$src))],
+ itins.rr>;
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ itins.rm>;
}
/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int> {
+ Intrinsic V4F32Int, OpndItins itins> {
def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ [(set VR256:$dst, (V4F32Int VR256:$src))],
+ itins.rr>;
def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
+ itins.rm>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F64Int> {
+ SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))]>;
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
// See the comments in sse1_fp_unop_s for why this is OptForSize.
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+ [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
Requires<[HasSSE2, OptForSize]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))]>;
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
}
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1 in {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ let mayLoad = 1 in
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
@@ -2934,45 +3117,52 @@ multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
/// sse2_fp_unop_p - SSE2 unops in vector forms.
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
}
/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
-multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+ itins.rr>;
def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ itins.rm>;
}
/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int> {
+ Intrinsic V2F64Int, OpndItins itins> {
def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))]>;
+ [(set VR128:$dst, (V2F64Int VR128:$src))],
+ itins.rr>;
def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+ [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
+ itins.rm>;
}
/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int> {
+ Intrinsic V2F64Int, OpndItins itins> {
def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ [(set VR256:$dst, (V2F64Int VR256:$src))],
+ itins.rr>;
def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
+ itins.rm>;
}
let Predicates = [HasAVX] in {
@@ -2980,31 +3170,40 @@ let Predicates = [HasAVX] in {
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
- defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
- sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
- sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
- sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
- sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
- sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
- sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
- sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
+ defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+ sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+ sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
+ SSE_SQRTP>,
+ sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
+ SSE_SQRTP>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
+ SSE_SQRTP>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
+ SSE_SQRTP>,
VEX;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
- defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
- sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
- sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
+ defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
+ SSE_SQRTP>,
+ sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
+ SSE_SQRTP>, VEX;
defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
- defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
- sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
- sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
+ defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
+ SSE_RCPP>,
+ sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
+ SSE_RCPP>, VEX;
}
+let AddedComplexity = 1 in {
def : Pat<(f32 (fsqrt FR32:$src)),
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -3027,8 +3226,9 @@ def : Pat<(f32 (X86frcp FR32:$src)),
def : Pat<(f32 (X86frcp (load addr:$src))),
(VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX, OptForSize]>;
+}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
(VSQRTSSr (f32 (IMPLICIT_DEF)),
@@ -3063,21 +3263,26 @@ let Predicates = [HasAVX] in {
}
// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
- sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt>,
- sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
+ SSE_SQRTS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
+ sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
+ SSE_SQRTS>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
+ sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
- sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
-defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp>,
- sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
+ SSE_SQRTS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+ SSE_SQRTS>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
+ SSE_RCPS>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -3090,24 +3295,22 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
let ExeDomain = SSEPackedInt in
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
(VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
@@ -3116,23 +3319,21 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX;
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
let ExeDomain = SSEPackedInt in
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX;
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
}
def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
@@ -3145,19 +3346,18 @@ def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVNT>;
def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
-
-def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVNT>;
let ExeDomain = SSEPackedInt in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+ [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVNT>;
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
(MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
@@ -3165,11 +3365,13 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)],
+ IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movnti{q}\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)],
+ IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
}
@@ -3178,31 +3380,40 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+let Predicates = [HasSSE1] in {
+def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))],
+ IIC_SSE_PREFETCH>, TB;
+}
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
+ IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>;
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP;
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
- "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+ "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
+ TB, Requires<[HasSSE1]>;
def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+ "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>,
+ TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+ "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
+ TB, Requires<[HasSSE2]>;
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -3213,14 +3424,18 @@ def : Pat<(X86MFence), (MFENCE)>;
//===----------------------------------------------------------------------===//
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, VEX;
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, VEX;
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>;
def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3230,108 +3445,134 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
let neverHasSideEffects = 1 in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
+ VEX;
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
+ VEX;
}
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
+ VEX;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
+ VEX;
// For Disassembler
let isCodeGenOnly = 1 in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>,
+ VEX;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>,
+ VEX;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqu\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>,
+ VEX;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqu\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVU_P_RR>,
+ VEX;
}
let canFoldAsLoad = 1, mayLoad = 1 in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
+ VEX;
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
+ VEX;
let Predicates = [HasAVX] in {
def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+ XS, VEX;
def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+ XS, VEX;
}
}
let mayStore = 1 in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
+ VEX;
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
+ VEX;
let Predicates = [HasAVX] in {
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
+ XS, VEX;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
+ XS, VEX;
}
}
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>;
+ "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- []>, XS, Requires<[HasSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
// For Disassembler
let isCodeGenOnly = 1 in {
def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>;
+ "movdqa\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVA_P_RR>;
def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- []>, XS, Requires<[HasSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
}
let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
+ IIC_SSE_MOVA_P_RM>;
def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
+ IIC_SSE_MOVU_P_RM>,
XS, Requires<[HasSSE2]>;
}
let mayStore = 1 in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
+ IIC_SSE_MOVA_P_MR>;
def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/],
+ IIC_SSE_MOVU_P_MR>,
XS, Requires<[HasSSE2]>;
}
// Intrinsic forms of MOVDQU load and store
def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
+ IIC_SSE_MOVU_P_MR>,
XS, VEX, Requires<[HasAVX]>;
def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
+ IIC_SSE_MOVU_P_MR>,
XS, Requires<[HasSSE2]>;
} // ExeDomain = SSEPackedInt
let Predicates = [HasAVX] in {
- def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
(VMOVDQUYmr addr:$dst, VR256:$src)>;
}
@@ -3340,178 +3581,326 @@ let Predicates = [HasAVX] in {
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
+def SSE_PMADD : OpndItins<
+ IIC_SSE_PMADD, IIC_SSE_PMADD
+>;
+
let ExeDomain = SSEPackedInt in { // SSE integer instructions
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- bit IsCommutable = 0, bit Is2Addr = 1> {
+ RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0,
+ bit Is2Addr = 1> {
let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>;
-}
-
-multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr, Intrinsic IntId,
- Intrinsic IntId2, bit Is2Addr = 1> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
+ itins.rm>;
+}
+
+multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, SDNode OpNode,
+ SDNode OpNode2, RegisterClass RC,
+ ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
+ ShiftOpndItins itins,
+ bit Is2Addr = 1> {
+ // src2 is always 128-bit
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>;
- def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
-}
-
-/// PDI_binop_rm - Simple SSE2 binary operator.
-multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
- let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
+ itins.rr>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
+ [(set RC:$dst, (DstVT (OpNode RC:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+ def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2)))))]>;
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
}
-/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
-///
-/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
-/// to collapse (bitconvert VT to VT) into its operand.
-///
-multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit IsCommutable = 0, bit Is2Addr = 1> {
+/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
+multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType DstVT, ValueType SrcVT, RegisterClass RC,
+ PatFrag memop_frag, X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
+ (bitconvert (memop_frag addr:$src2)))))]>;
}
-
} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
let Predicates = [HasAVX] in {
-defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
-defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
-defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
-defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
-defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
-defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
-defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
-defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
+ VEX_4V;
+defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
+ i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
+ i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
+ i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
+ memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
+ VR128, memopv2i64, i128mem,
+ SSE_PMADD, 1, 0>, VEX_4V;
+defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
+ i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
+ i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
+ i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
+ i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
+ i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
+ i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
+ i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
+ i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
+ i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
+ VR256, memopv4i64, i256mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
// Intrinsic forms
-defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
- VEX_4V;
-defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
- VEX_4V;
-defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
- VEX_4V;
-defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
- VEX_4V;
-defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
- VEX_4V;
-defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
- VEX_4V;
-defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
- VEX_4V;
-defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
- VEX_4V;
-defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
- VEX_4V;
-defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
- VEX_4V;
-defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
- VEX_4V;
-defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
- VEX_4V;
-defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
- VEX_4V;
-defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
- VEX_4V;
-defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
- VEX_4V;
-defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
- VEX_4V;
-defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
- VEX_4V;
-defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
- VEX_4V;
-defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
- VEX_4V;
+defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
+ VR256, memopv4i64, i256mem,
+ SSE_PMADD, 1, 0>, VEX_4V;
+defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
-defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
-defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
-defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
-defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
-defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P, 1>;
+defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
+ i128mem, SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
+ i128mem, SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
+ i128mem, SSE_INTALU_ITINS_P>;
+defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
+ i128mem, SSE_INTALUQ_ITINS_P>;
+defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
+ memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
// Intrinsic forms
-defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
-defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
-defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
-defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
-defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
-defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
-defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+ VR128, memopv2i64, i128mem,
+ SSE_PMADD, 1>;
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
} // Constraints = "$src1 = $dst"
@@ -3520,145 +3909,176 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
//===---------------------------------------------------------------------===//
let Predicates = [HasAVX] in {
-defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
- int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
- VEX_4V;
-defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
- int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
- VEX_4V;
-defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
- int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
- VEX_4V;
-
-defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
- int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
- VEX_4V;
-defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
- int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
- VEX_4V;
-defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
- int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
- VEX_4V;
-
-defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
- int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
- VEX_4V;
-defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
- int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
- VEX_4V;
-
-defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
-defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
- let neverHasSideEffects = 1 in {
- // 128-bit logical shifts.
- def VPSLLDQri : PDIi8<0x73, MRM7r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V;
- def VPSRLDQri : PDIi8<0x73, MRM3r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V;
- // PSRADQri doesn't exist in SSE[1-3].
- }
- def VPANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+}
+} // Predicates = [HasAVX]
+
+let Predicates = [HasAVX2] in {
+defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR256, v16i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+ VR256, v8i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+ VR256, v4i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR256, v16i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+ VR256, v8i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+ VR256, v4i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR256, v16i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+ VR256, v8i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
- def VPANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (X86andnp VR128:$src1,
- (memopv2i64 addr:$src2)))]>, VEX_4V;
-}
-}
+let ExeDomain = SSEPackedInt in {
+ // 256-bit logical shifts.
+ def VPSLLDQYri : PDIi8<0x73, MRM7r,
+ (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR256:$dst,
+ (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
+ VEX_4V;
+ def VPSRLDQYri : PDIi8<0x73, MRM3r,
+ (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR256:$dst,
+ (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
+ VEX_4V;
+ // PSRADQYri doesn't exist in SSE[1-3].
+}
+} // Predicates = [HasAVX2]
let Constraints = "$src1 = $dst" in {
-defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
- int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
-defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
- int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
-defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
- int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
-
-defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
- int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
-defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
- int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
-defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
- int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
-
-defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
- int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
-defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
- int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
-
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P>;
+
+defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
+ VR128, v2i64, v2i64, bc_v2i64,
+ SSE_INTSHIFT_ITINS_P>;
+
+defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16,
+ SSE_INTSHIFT_ITINS_P>;
+defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
+ VR128, v4i32, v4i32, bc_v4i32,
+ SSE_INTSHIFT_ITINS_P>;
let ExeDomain = SSEPackedInt in {
- let neverHasSideEffects = 1 in {
- // 128-bit logical shifts.
- def PSLLDQri : PDIi8<0x73, MRM7r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "pslldq\t{$src2, $dst|$dst, $src2}", []>;
- def PSRLDQri : PDIi8<0x73, MRM3r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "psrldq\t{$src2, $dst|$dst, $src2}", []>;
- // PSRADQri doesn't exist in SSE[1-3].
- }
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
-
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
+ // PSRADQri doesn't exist in SSE[1-3].
}
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
- (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
- (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
- def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
- def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
- def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
- (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
- def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
- (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
+ (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+ def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
+ (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+}
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
+ (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
+ def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
+ (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
}
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
- (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
- (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
- def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
- def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
- def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
- (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
- def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
- (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
+ (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+ def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
+ (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
}
//===---------------------------------------------------------------------===//
@@ -3666,100 +4086,106 @@ let Predicates = [HasSSE2] in {
//===---------------------------------------------------------------------===//
let Predicates = [HasAVX] in {
- defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
- 0>, VEX_4V;
- defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
- 0>, VEX_4V;
- defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
- 0>, VEX_4V;
- defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
- 0>, VEX_4V;
- defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
- 0>, VEX_4V;
- defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
- 0>, VEX_4V;
-
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
- (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
- (VPCMPEQBrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
- (VPCMPEQWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
- (VPCMPEQWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
- (VPCMPEQDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
- (VPCMPEQDrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
- (VPCMPGTBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
- (VPCMPGTBrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
- (VPCMPGTWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
- (VPCMPGTWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
- (VPCMPGTDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
- (VPCMPGTDrm VR128:$src1, addr:$src2)>;
+ defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+ defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+ defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
- defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
- defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
- defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
- defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
- defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+ defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+ defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+ defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 1>;
+ defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+ defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+ defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
} // Constraints = "$src1 = $dst"
-let Predicates = [HasSSE2] in {
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
- (PCMPEQBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
- (PCMPEQBrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
- (PCMPEQWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
- (PCMPEQWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
- (PCMPEQDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
- (PCMPEQDrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
- (PCMPGTBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
- (PCMPGTBrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
- (PCMPGTWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
- (PCMPGTWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
- (PCMPGTDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
- (PCMPGTDrm VR128:$src1, addr:$src2)>;
-}
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
let Predicates = [HasAVX] in {
defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
- 0, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
- 0, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
- 0, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
+ VR256, memopv4i64, i256mem,
+ SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
-defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
-defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
-defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+ VR128, memopv2i64, i128mem,
+ SSE_INTALU_ITINS_P>;
} // Constraints = "$src1 = $dst"
//===---------------------------------------------------------------------===//
@@ -3767,103 +4193,75 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
-multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
- PatFrag bc_frag> {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
def ri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>;
+def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>;
+}
+
+multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
+def Yri : Ii8<0x70, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
- (undef))))]>;
-def mi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
+def Ymi : Ii8<0x70, MRMSrcMem,
+ (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (vt (pshuf_frag:$src2
- (bc_frag (memopv2i64 addr:$src1)),
- (undef))))]>;
+ [(set VR256:$dst,
+ (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
+ (i8 imm:$src2))))]>;
}
} // ExeDomain = SSEPackedInt
let Predicates = [HasAVX] in {
- let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize,
- VEX;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
- VEX;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
- VEX;
-
- let AddedComplexity = 5 in
- def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
- (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
- // Unary v4f32 shuffle with VPSHUF* in order to fold a load.
- def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
- (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
-
- def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
- (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
- (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
- def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
- def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
- (VPSHUFHWri VR128:$src, imm:$imm)>;
- def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
- (i8 imm:$imm))),
- (VPSHUFHWmi addr:$src, imm:$imm)>;
- def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
- (VPSHUFLWri VR128:$src, imm:$imm)>;
- def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
- (i8 imm:$imm))),
- (VPSHUFLWmi addr:$src, imm:$imm)>;
+ let AddedComplexity = 5 in
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
+
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
+ defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
+ defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
}
let Predicates = [HasSSE2] in {
- let AddedComplexity = 5 in
- defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
-
- let AddedComplexity = 5 in
- def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
- (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
- // Unary v4f32 shuffle with PSHUF* in order to fold a load.
- def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
- (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
-
- def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
- (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
- (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
- def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
- def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
- (PSHUFHWri VR128:$src, imm:$imm)>;
- def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
- (i8 imm:$imm))),
- (PSHUFHWmi addr:$src, imm:$imm)>;
- def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
- (PSHUFLWri VR128:$src, imm:$imm)>;
- def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
- (i8 imm:$imm))),
- (PSHUFLWmi addr:$src, imm:$imm)>;
+ let AddedComplexity = 5 in
+ defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
+
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -3878,7 +4276,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>;
+ [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
+ IIC_SSE_UNPCK>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -3886,96 +4285,104 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
- addr:$src2))))]>;
+ addr:$src2))))],
+ IIC_SSE_UNPCK>;
+}
+
+multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
+ SDNode OpNode, PatFrag bc_frag> {
+ def Yrr : PDI<opc, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+ def Yrm : PDI<opc, MRMSrcMem,
+ (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (OpNode VR256:$src1,
+ (bc_frag (memopv4i64 addr:$src2))))]>;
}
let Predicates = [HasAVX] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
bc_v4i32, 0>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
+ bc_v2i64, 0>, VEX_4V;
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
-
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
+ bc_v2i64, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
+ bc_v32i8>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
+ bc_v16i16>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
+ bc_v8i32>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
+ bc_v4i64>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
+ bc_v32i8>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
+ bc_v16i16>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
+ bc_v8i32>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
+ bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
-
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
+ bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
+ bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
+ bc_v4i32>;
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
+ bc_v2i64>;
+
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
+ bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
+ bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
+ bc_v4i32>;
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
+ bc_v2i64>;
}
} // ExeDomain = SSEPackedInt
-// Splat v2f64 / v2i64
-let AddedComplexity = 10 in {
- def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
- (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
- def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
- (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+// Patterns for using AVX1 instructions with integer vectors
+// Here to give AVX2 priority
+let Predicates = [HasAVX] in {
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
}
//===---------------------------------------------------------------------===//
@@ -3991,7 +4398,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4000,7 +4407,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))]>;
+ imm:$src3))], IIC_SSE_PINSRW>;
}
// Extract
@@ -4014,7 +4421,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>;
+ imm:$src2))], IIC_SSE_PEXTRW>;
// Insert
let Predicates = [HasAVX] in {
@@ -4038,12 +4445,23 @@ let ExeDomain = SSEPackedInt in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ IIC_SSE_MOVMSK>, VEX;
def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX;
+
+let Predicates = [HasAVX2] in {
+def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
+def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ IIC_SSE_MOVMSK>;
} // ExeDomain = SSEPackedInt
@@ -4057,21 +4475,25 @@ let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
+ IIC_SSE_MASKMOV>, VEX;
let Uses = [RDI] in
def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
+ IIC_SSE_MASKMOV>, VEX;
let Uses = [EDI] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
+ IIC_SSE_MASKMOV>;
let Uses = [RDI] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
+ IIC_SSE_MASKMOV>;
} // ExeDomain = SSEPackedInt
@@ -4085,54 +4507,65 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ VEX;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_SSE_MOVDQ>,
VEX;
def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
+ (v2i64 (scalar_to_vector GR64:$src)))],
+ IIC_SSE_MOVDQ>, VEX;
def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
+ [(set FR64:$dst, (bitconvert GR64:$src))],
+ IIC_SSE_MOVDQ>, VEX;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))]>;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_SSE_MOVDQ>;
def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2i64 (scalar_to_vector GR64:$src)))]>;
+ (v2i64 (scalar_to_vector GR64:$src)))],
+ IIC_SSE_MOVDQ>;
def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))]>;
+ [(set FR64:$dst, (bitconvert GR64:$src))],
+ IIC_SSE_MOVDQ>;
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
//
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, VEX;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>,
VEX;
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))]>;
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>;
def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4140,20 +4573,22 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))]>, VEX;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
- (iPTR 0))), addr:$dst)]>, VEX;
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+ VEX;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))]>;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOVDQ>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
@@ -4161,13 +4596,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))]>,
+ (iPTR 0)))],
+ IIC_SSE_MOVD_ToGP>,
TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))]>;
+ (iPTR 0)))],
+ IIC_SSE_MOVD_ToGP>;
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
@@ -4179,36 +4616,45 @@ def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
VEX;
def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>;
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVDQ>, VEX;
def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX;
def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
+ IIC_SSE_MOVDQ>;
def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>;
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVD_ToGP>;
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>;
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
//
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, VEX;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX;
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))]>;
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>;
def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>;
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
@@ -4217,23 +4663,26 @@ let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))]>,
- VEX;
+ (v4i32 (scalar_to_vector GR32:$src)))))],
+ IIC_SSE_MOVDQ>, VEX;
def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))]>,
+ (v2i64 (scalar_to_vector GR64:$src)))))],
+ IIC_SSE_MOVDQ>,
VEX, VEX_W;
}
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))]>;
+ (v4i32 (scalar_to_vector GR32:$src)))))],
+ IIC_SSE_MOVDQ>;
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))]>;
+ (v2i64 (scalar_to_vector GR64:$src)))))],
+ IIC_SSE_MOVDQ>;
}
let AddedComplexity = 20 in {
@@ -4241,29 +4690,19 @@ def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))]>,
- VEX;
+ (loadi32 addr:$src))))))],
+ IIC_SSE_MOVDQ>, VEX;
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))]>;
-}
-
-let Predicates = [HasSSE2], AddedComplexity = 20 in {
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (MOVZDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
+ (loadi32 addr:$src))))))],
+ IIC_SSE_MOVDQ>;
}
let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (VMOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(VMOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -4278,6 +4717,13 @@ let Predicates = [HasAVX] in {
(SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+}
+
// These are the correct encodings of the instructions so that we know how to
// read correct assembly, even though we continue to emit the wrong ones for
// compatibility with Darwin's buggy assembler.
@@ -4309,7 +4755,8 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
+ IIC_SSE_MOVDQ>, XS,
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
//===---------------------------------------------------------------------===//
@@ -4318,11 +4765,13 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>, VEX;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX;
def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOVDQ>;
//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
@@ -4332,14 +4781,16 @@ def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
+ IIC_SSE_MOVDQ>;
let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
- (loadi64 addr:$src))))))]>,
+ (loadi64 addr:$src))))))],
+ IIC_SSE_MOVDQ>,
XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 20 in
@@ -4347,9 +4798,19 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
- (loadi64 addr:$src))))))]>,
+ (loadi64 addr:$src))))))],
+ IIC_SSE_MOVDQ>,
XS, Requires<[HasSSE2]>;
+let Predicates = [HasAVX], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)),
+ (VMOVZQI2PQIrm addr:$src)>;
+}
+
let Predicates = [HasSSE2], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
@@ -4358,13 +4819,11 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
}
-let Predicates = [HasAVX], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZQI2PQIrm addr:$src)>;
- def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
- (VMOVZQI2PQIrm addr:$src)>;
- def : Pat<(v2i64 (X86vzload addr:$src)),
- (VMOVZQI2PQIrm addr:$src)>;
+let Predicates = [HasAVX] in {
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
}
//===---------------------------------------------------------------------===//
@@ -4374,51 +4833,58 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
+ IIC_SSE_MOVQ_RR>,
XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
+ IIC_SSE_MOVQ_RR>,
XS, Requires<[HasSSE2]>;
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
- (loadv2i64 addr:$src))))]>,
+ (loadv2i64 addr:$src))))],
+ IIC_SSE_MOVDQ>,
XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
- (loadv2i64 addr:$src))))]>,
+ (loadv2i64 addr:$src))))],
+ IIC_SSE_MOVDQ>,
XS, Requires<[HasSSE2]>;
}
let AddedComplexity = 20 in {
- let Predicates = [HasSSE2] in {
- def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
- (MOVZPQILo2PQIrm addr:$src)>;
- def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (MOVZPQILo2PQIrr VR128:$src)>;
- }
let Predicates = [HasAVX] in {
- def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(VMOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(VMOVZPQILo2PQIrr VR128:$src)>;
}
+ let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>;
+ }
}
// Instructions to match in the assembler
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVDQ>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVDQ>, VEX, VEX_W;
// Recognize "movd" with GR64 destination, but encode as a "movq"
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+ "movd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_MOVDQ>, VEX, VEX_W;
// Instructions for the disassembler
// xr = XMM register
@@ -4428,7 +4894,7 @@ let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>, XS;
+ "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
//===---------------------------------------------------------------------===//
// SSE3 - Conversion Instructions
@@ -4458,14 +4924,16 @@ def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
}
def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
- (VCVTPD2DQYrr VR256:$src)>;
+ (VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
- (VCVTPD2DQYrm addr:$src)>;
+ (VCVTTPD2DQYrm addr:$src)>;
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX] in {
@@ -4480,14 +4948,16 @@ def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
}
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
// AVX 256-bit register conversion intrinsics
def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
(VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
(VCVTDQ2PDYrm addr:$src)>;
def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
@@ -4497,7 +4967,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
(VCVTDQ2PDYrm addr:$src)>;
//===---------------------------------------------------------------------===//
@@ -4508,10 +4978,12 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
X86MemOperand x86memop> {
def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (vt (OpNode RC:$src)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src)))],
+ IIC_SSE_MOV_LH>;
def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>;
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
+ IIC_SSE_MOV_LH>;
}
let Predicates = [HasAVX] in {
@@ -4529,17 +5001,6 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
memopv4f32, f128mem>;
-let Predicates = [HasSSE3] in {
- def : Pat<(v4i32 (X86Movshdup VR128:$src)),
- (MOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
- (MOVSHDUPrm addr:$src)>;
- def : Pat<(v4i32 (X86Movsldup VR128:$src)),
- (MOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
- (MOVSLDUPrm addr:$src)>;
-}
-
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
@@ -4559,82 +5020,60 @@ let Predicates = [HasAVX] in {
(VMOVSLDUPYrm addr:$src)>;
}
+let Predicates = [HasSSE3] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSLDUPrm addr:$src)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Double FP - MOVDDUP
//===---------------------------------------------------------------------===//
multiclass sse3_replicate_dfp<string OpcodeStr> {
+let neverHasSideEffects = 1 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+ [], IIC_SSE_MOV_LH>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
- (undef))))]>;
+ (v2f64 (X86Movddup
+ (scalar_to_vector (loadf64 addr:$src)))))],
+ IIC_SSE_MOV_LH>;
}
// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (v4f64 (X86Movddup
+ (scalar_to_vector (loadf64 addr:$src)))))]>;
+}
+
let Predicates = [HasAVX] in {
- def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>;
- def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>;
- }
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
-defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
-defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
-
-let Predicates = [HasSSE3] in {
- def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
- (undef)),
- (MOVDDUPrm addr:$src)>;
- let AddedComplexity = 5 in {
- def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
- def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
- def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
- (MOVDDUPrm addr:$src)>;
- }
- def : Pat<(X86Movddup (memopv2f64 addr:$src)),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (MOVDDUPrm addr:$src)>;
-}
let Predicates = [HasAVX] in {
- def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
- (undef)),
- (VMOVDDUPrm addr:$src)>;
- let AddedComplexity = 5 in {
- def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
- def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
- (VMOVDDUPrm addr:$src)>;
- def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
- def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
- (VMOVDDUPrm addr:$src)>;
- }
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
@@ -4644,16 +5083,24 @@ let Predicates = [HasAVX] in {
(VMOVDDUPYrm addr:$src)>;
def : Pat<(X86Movddup (memopv4i64 addr:$src)),
(VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))),
- (VMOVDDUPYrm addr:$src)>;
def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
(VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (v4f64 VR256:$src)),
- (VMOVDDUPYrr VR256:$src)>;
def : Pat<(X86Movddup (v4i64 VR256:$src)),
(VMOVDDUPYrr VR256:$src)>;
}
+let Predicates = [HasSSE3] in {
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE3 - Move Unaligned Integer
//===---------------------------------------------------------------------===//
@@ -4668,45 +5115,51 @@ let Predicates = [HasAVX] in {
}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
+ IIC_SSE_LDDQU>;
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, OpndItins itins,
+ bit Is2Addr = 1> {
def rr : I<0xD0, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
def rm : I<0xD0, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
}
-let Predicates = [HasAVX],
- ExeDomain = SSEPackedDouble in {
- defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, 0>, TB, XD, VEX_4V;
- defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, 0>, TB, OpSize, VEX_4V;
- defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, 0>, TB, XD, VEX_4V;
- defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, 0>, TB, OpSize, VEX_4V;
-}
-let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
- ExeDomain = SSEPackedDouble in {
+let Predicates = [HasAVX] in {
+ let ExeDomain = SSEPackedSingle in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+ }
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
+ let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
- f128mem>, TB, XD;
+ f128mem, SSE_ALU_F32P>, TB, XD;
+ let ExeDomain = SSEPackedDouble in
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
- f128mem>, TB, OpSize;
+ f128mem, SSE_ALU_F64P>, TB, OpSize;
}
//===---------------------------------------------------------------------===//
@@ -4720,13 +5173,14 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+ IIC_SSE_HADDSUB_RM>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -4734,39 +5188,48 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+ IIC_SSE_HADDSUB_RM>;
}
let Predicates = [HasAVX] in {
- defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, 0>, VEX_4V;
- defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, 0>, VEX_4V;
- defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, 0>, VEX_4V;
- defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, 0>, VEX_4V;
- defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
- defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
- defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
- defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in {
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ }
}
let Constraints = "$src1 = $dst" in {
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ let ExeDomain = SSEPackedSingle in {
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ }
}
//===---------------------------------------------------------------------===//
@@ -4776,11 +5239,11 @@ let Constraints = "$src1 = $dst" in {
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128> {
+ Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
OpSize;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
@@ -4788,32 +5251,101 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
+ OpSize;
+}
+
+/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId256> {
+ def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId256 VR256:$src))]>,
+ OpSize;
+
+ def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (IntId256
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw",
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
int_x86_ssse3_pabs_d_128>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+let Predicates = [HasAVX2] in {
+ defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
+ int_x86_avx2_pabs_b>, VEX;
+ defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
+ int_x86_avx2_pabs_w>, VEX;
+ defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
+ int_x86_avx2_pabs_d>, VEX;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
+def SSE_PHADDSUBD : OpndItins<
+ IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
+>;
+def SSE_PHADDSUBSW : OpndItins<
+ IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM
+>;
+def SSE_PHADDSUBW : OpndItins<
+ IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
+>;
+def SSE_PSHUFB : OpndItins<
+ IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
+>;
+def SSE_PSIGN : OpndItins<
+ IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
+>;
+def SSE_PMULHRSW : OpndItins<
+ IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
+>;
+
+/// SS3I_binop_rm - Simple SSSE3 bin op
+multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, OpndItins itins,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ OpSize;
+ def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+}
+
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128,
+ Intrinsic IntId128, OpndItins itins,
bit Is2Addr = 1> {
let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
@@ -4830,94 +5362,134 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+}
+
+multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId256> {
+ let isCommutable = 1 in
+ def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
+ OpSize;
+ def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (IntId256 VR256:$src1,
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
- int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
- int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
- int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
- int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
- int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
- int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
- int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
- int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
- int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
- int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
- int_x86_ssse3_psign_d_128, 0>, VEX_4V;
-}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
- int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
+ defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBD, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
+ memopv2i64, i128mem,
+ SSE_PHADDSUBD, 0>, VEX_4V;
+ defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
+ memopv2i64, i128mem,
+ SSE_PSIGN, 0>, VEX_4V;
+ defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
+ memopv2i64, i128mem,
+ SSE_PSIGN, 0>, VEX_4V;
+ defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
+ memopv2i64, i128mem,
+ SSE_PSIGN, 0>, VEX_4V;
+ defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
+ memopv2i64, i128mem,
+ SSE_PSHUFB, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
+ int_x86_ssse3_phadd_sw_128,
+ SSE_PHADDSUBSW, 0>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
+ int_x86_ssse3_phsub_sw_128,
+ SSE_PHADDSUBSW, 0>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw_128,
+ SSE_PMADD, 0>, VEX_4V;
+}
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
+ int_x86_ssse3_pmul_hr_sw_128,
+ SSE_PMULHRSW, 0>, VEX_4V;
+}
+
+let ImmT = NoImm, Predicates = [HasAVX2] in {
+let isCommutable = 0 in {
+ defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
+ memopv4i64, i256mem,
+ SSE_PHADDSUBW, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
+ int_x86_avx2_phadd_sw>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
+ int_x86_avx2_phsub_sw>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
+ int_x86_avx2_pmadd_ub_sw>, VEX_4V;
+}
+defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
+ int_x86_avx2_pmul_hr_sw>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
- int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
- int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
- int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
- int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
- int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
- int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
- int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
- int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
- int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
- int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
- int_x86_ssse3_psign_d_128>;
-}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
- int_x86_ssse3_pmul_hr_sw_128>;
-}
-
-let Predicates = [HasSSSE3] in {
- def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (PSHUFBrr128 VR128:$src, VR128:$mask)>;
- def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (PSHUFBrm128 VR128:$src, addr:$mask)>;
-
- def : Pat<(X86psignb VR128:$src1, VR128:$src2),
- (PSIGNBrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignw VR128:$src1, VR128:$src2),
- (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignd VR128:$src1, VR128:$src2),
- (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
-}
-
-let Predicates = [HasAVX] in {
- def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (VPSHUFBrr128 VR128:$src, VR128:$mask)>;
- def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (VPSHUFBrm128 VR128:$src, addr:$mask)>;
-
- def : Pat<(X86psignb VR128:$src1, VR128:$src2),
- (VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignw VR128:$src1, VR128:$src2),
- (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignd VR128:$src1, VR128:$src2),
- (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
+ defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBW>;
+ defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBD>;
+ defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBW>;
+ defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
+ memopv2i64, i128mem, SSE_PHADDSUBD>;
+ defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
+ memopv2i64, i128mem, SSE_PSIGN>;
+ defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PSIGN>;
+ defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
+ memopv2i64, i128mem, SSE_PSIGN>;
+ defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
+ memopv2i64, i128mem, SSE_PSHUFB>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
+ int_x86_ssse3_phadd_sw_128,
+ SSE_PHADDSUBSW>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
+ int_x86_ssse3_phsub_sw_128,
+ SSE_PHADDSUBSW>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw_128,
+ SSE_PMULHRSW>;
}
//===---------------------------------------------------------------------===//
@@ -4925,36 +5497,57 @@ let Predicates = [HasAVX] in {
//===---------------------------------------------------------------------===//
multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize;
+ let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [], IIC_SSE_PALIGNR>, OpSize;
+ }
+}
+
+multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
+ def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+ let mayLoad = 1 in
+ def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
+ }
}
let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+let Predicates = [HasAVX2] in
+ defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
defm PALIGN : ssse3_palign<"palignr">;
-let Predicates = [HasSSSE3] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
}
let Predicates = [HasAVX] in {
@@ -4968,23 +5561,36 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
+let Predicates = [HasSSSE3] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
//===---------------------------------------------------------------------===//
// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
+ Requires<[HasSSE3]>;
def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
- [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
+ Requires<[HasSSE3]>;
}
let Uses = [EAX, ECX, EDX] in
-def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
- Requires<[HasSSE3]>;
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
+ TB, Requires<[HasSSE3]>;
let Uses = [ECX, EAX] in
-def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
- Requires<[HasSSE3]>;
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
+ TB, Requires<[HasSSE3]>;
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
@@ -5010,6 +5616,17 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId> {
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
+}
+
let Predicates = [HasAVX] in {
defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
VEX;
@@ -5025,6 +5642,21 @@ defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
VEX;
}
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
+ int_x86_avx2_pmovsxbw>, VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
+ int_x86_avx2_pmovsxwd>, VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
+ int_x86_avx2_pmovsxdq>, VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
+ int_x86_avx2_pmovzxbw>, VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
+ int_x86_avx2_pmovzxwd>, VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
+ int_x86_avx2_pmovzxdq>, VEX;
+}
+
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
@@ -5032,70 +5664,80 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
-let Predicates = [HasSSE41] in {
+let Predicates = [HasAVX] in {
// Common patterns involving scalar load.
def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>;
+ (VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>;
+ (VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>;
+ (VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>;
+ (VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>;
+ (VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>;
+ (VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>;
+ (VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>;
+ (VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>;
+ (VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>;
+ (VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>;
+ (VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>;
+ (VPMOVZXDQrm addr:$src)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE41] in {
// Common patterns involving scalar load.
def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (VPMOVSXBWrm addr:$src)>;
+ (PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (VPMOVSXBWrm addr:$src)>;
+ (PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (VPMOVSXWDrm addr:$src)>;
+ (PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (VPMOVSXWDrm addr:$src)>;
+ (PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (VPMOVSXDQrm addr:$src)>;
+ (PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (VPMOVSXDQrm addr:$src)>;
+ (PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (VPMOVZXBWrm addr:$src)>;
+ (PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (VPMOVZXBWrm addr:$src)>;
+ (PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (VPMOVZXWDrm addr:$src)>;
+ (PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (VPMOVZXWDrm addr:$src)>;
+ (PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (VPMOVZXDQrm addr:$src)>;
+ (PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (VPMOVZXDQrm addr:$src)>;
+ (PMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+}
+
+let Predicates = [HasSSE41] in {
+def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
}
@@ -5111,6 +5753,19 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId> {
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
let Predicates = [HasAVX] in {
defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
VEX;
@@ -5122,35 +5777,46 @@ defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
VEX;
}
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
+ int_x86_avx2_pmovsxbd>, VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
+ int_x86_avx2_pmovsxwq>, VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
+ int_x86_avx2_pmovzxbd>, VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
+ int_x86_avx2_pmovzxwq>, VEX;
+}
+
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
-let Predicates = [HasSSE41] in {
+let Predicates = [HasAVX] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (PMOVSXBDrm addr:$src)>;
+ (VPMOVSXBDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (PMOVSXWQrm addr:$src)>;
+ (VPMOVSXWQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (PMOVZXBDrm addr:$src)>;
+ (VPMOVZXBDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (PMOVZXWQrm addr:$src)>;
+ (VPMOVZXWQrm addr:$src)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE41] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (VPMOVSXBDrm addr:$src)>;
+ (PMOVSXBDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (VPMOVSXWQrm addr:$src)>;
+ (PMOVSXWQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (VPMOVZXBDrm addr:$src)>;
+ (PMOVZXBDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (VPMOVZXWQrm addr:$src)>;
+ (PMOVZXWQrm addr:$src)>;
}
multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
@@ -5166,39 +5832,59 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId> {
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
let Predicates = [HasAVX] in {
defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
VEX;
defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
VEX;
}
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
+ int_x86_avx2_pmovsxbq>, VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
+ int_x86_avx2_pmovzxbq>, VEX;
+}
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
-let Predicates = [HasSSE41] in {
+let Predicates = [HasAVX] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXBQrm addr:$src)>;
+ (VPMOVSXBQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbq
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXBQrm addr:$src)>;
+ (VPMOVZXBQrm addr:$src)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE41] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVSXBQrm addr:$src)>;
+ (PMOVSXBQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbq
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVZXBQrm addr:$src)>;
+ (PMOVZXBQrm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -5213,6 +5899,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
OpSize;
+ let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
@@ -5235,6 +5922,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
@@ -5311,26 +5999,28 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let Predicates = [HasAVX] in {
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
- def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, OpSize, VEX;
+let ExeDomain = SSEPackedSingle in {
+ let Predicates = [HasAVX] in {
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+ }
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
}
-defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src2))),
addr:$dst),
- (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasSSE41]>;
+ (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasAVX]>;
def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src2))),
addr:$dst),
- (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasAVX]>;
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Insert Instructions
@@ -5439,17 +6129,12 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, OpSize;
}
-let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let Predicates = [HasAVX] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
-
-def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
- (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
- Requires<[HasAVX]>;
-def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
- (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
- Requires<[HasSSE41]>;
+let ExeDomain = SSEPackedSingle in {
+ let Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+ let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
@@ -5459,6 +6144,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
PatFrag mem_frag32, PatFrag mem_frag64,
Intrinsic V4F32Int, Intrinsic V2F64Int> {
+let ExeDomain = SSEPackedSingle in {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
@@ -5469,15 +6155,16 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
OpSize;
// Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
+ def PSm : SS4AIi8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
- TA, OpSize,
- Requires<[HasSSE41]>;
+ OpSize;
+} // ExeDomain = SSEPackedSingle
+let ExeDomain = SSEPackedDouble in {
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
@@ -5494,46 +6181,26 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
[(set RC:$dst,
(V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
-}
-
-multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
- RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-
- // Vector intrinsic operation, mem
- def PSm_AVX : Ii8<opcps, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TA, OpSize, Requires<[HasSSE41]>;
-
- // Vector intrinsic operation, reg
- def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-
- // Vector intrinsic operation, mem
- def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
+} // ExeDomain = SSEPackedDouble
}
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
- // Intrinsic operation, reg.
+let ExeDomain = GenericDomain in {
+ // Operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -5555,8 +6222,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
OpSize;
- // Intrinsic operation, reg.
+ // Operation, reg.
def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -5577,37 +6254,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
[(set VR128:$dst,
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
OpSize;
-}
-
-multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
- // Intrinsic operation, reg.
- def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, mem.
- def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, reg.
- def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, mem.
- def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+} // ExeDomain = GenericDomain
}
// FP round - roundss, roundps, roundsd, roundpd
@@ -5625,12 +6272,26 @@ let Predicates = [HasAVX] in {
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
- // Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
- VEX;
- defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
- VEX;
- defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
+ def : Pat<(ffloor FR32:$src),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -5640,6 +6301,27 @@ let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
@@ -5649,11 +6331,11 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
OpSize, VEX;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
OpSize, VEX;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -5668,12 +6350,12 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ "ptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ "ptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
OpSize;
}
@@ -5690,11 +6372,15 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedSingle in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
}
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
@@ -5743,7 +6429,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in
@@ -5769,15 +6455,29 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+}
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId256> {
+ let isCommutable = 1 in
+ def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize;
+ def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (IntId256 VR256:$src1,
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
- defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
- 0>, VEX_4V;
defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
0>, VEX_4V;
defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
@@ -5796,17 +6496,35 @@ let Predicates = [HasAVX] in {
0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
+}
- def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (VPCMPEQQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (VPCMPEQQrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+ int_x86_avx2_packusdw>, VEX_4V;
+ defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
+ int_x86_avx2_pmins_b>, VEX_4V;
+ defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
+ int_x86_avx2_pmins_d>, VEX_4V;
+ defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
+ int_x86_avx2_pminu_d>, VEX_4V;
+ defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
+ int_x86_avx2_pminu_w>, VEX_4V;
+ defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
+ int_x86_avx2_pmaxs_b>, VEX_4V;
+ defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
+ int_x86_avx2_pmaxs_d>, VEX_4V;
+ defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
+ int_x86_avx2_pmaxu_d>, VEX_4V;
+ defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
+ int_x86_avx2_pmaxu_w>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+ int_x86_avx2_pmul_dq>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
- defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
@@ -5818,36 +6536,46 @@ let Constraints = "$src1 = $dst" in {
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
}
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (PCMPEQQrm VR128:$src1, addr:$src2)>;
-
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Is2Addr = 1> {
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
- OpSize;
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>,
- OpSize;
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
}
-let Predicates = [HasAVX] in
- defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+let Predicates = [HasAVX] in {
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+}
+let Predicates = [HasAVX2] in {
+ defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
+ defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
+ memopv2i64, i128mem>;
+}
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
@@ -5878,77 +6606,106 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, memopv4f32, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, memopv2f64, i128mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
+ }
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
}
+ let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, i128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, i128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv8f32, i256mem, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in {
+ defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
+ VR256, memopv4i64, i256mem, 0>, VEX_4V;
+ defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
+ VR256, memopv4i64, i256mem, 0>, VEX_4V;
+ }
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
+ let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv4f32, i128mem>;
+ let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2f64, i128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
}
+ let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv4f32, i128mem>;
+ let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2f64, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let Predicates = [HasAVX] in {
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
- def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
- SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
RC:$src3))],
- SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
-}
+ IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
}
+let Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvpd>;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvps>;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv16i8, int_x86_sse41_pblendvb>;
+ memopv2f64, int_x86_sse41_blendvpd>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
- memopv32i8, int_x86_avx_blendv_pd_256>;
+ memopv4f64, int_x86_avx_blendv_pd_256>;
+} // ExeDomain = SSEPackedDouble
+let ExeDomain = SSEPackedSingle in {
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv4f32, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
- memopv32i8, int_x86_avx_blendv_ps_256>;
+ memopv8f32, int_x86_avx_blendv_ps_256>;
+} // ExeDomain = SSEPackedSingle
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv2i64, int_x86_sse41_pblendvb>;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
+ memopv4i64, int_x86_avx2_pblendvb>;
+}
let Predicates = [HasAVX] in {
def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
@@ -5978,11 +6735,28 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+
+ def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+ (imm:$mask))),
+ (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+ def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+ (imm:$mask))),
+ (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+}
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
+ (v32i8 VR256:$src2))),
+ (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+ (imm:$mask))),
+ (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
}
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic IntId> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
@@ -5996,13 +6770,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
}
}
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+let ExeDomain = SSEPackedDouble in
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
+ int_x86_sse41_blendvpd>;
+let ExeDomain = SSEPackedSingle in
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
+ int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
+ int_x86_sse41_pblendvb>;
let Predicates = [HasSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -6020,6 +6799,17 @@ let Predicates = [HasSSE41] in {
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+
+ def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ (imm:$mask))),
+ (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (imm:$mask))),
+ (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (imm:$mask))),
+ (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+
}
let Predicates = [HasAVX] in
@@ -6027,6 +6817,11 @@ def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
OpSize, VEX;
+let Predicates = [HasAVX2] in
+def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
+ OpSize, VEX;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -6036,43 +6831,37 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
// SSE4.2 - Compare Instructions
//===----------------------------------------------------------------------===//
-/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
-multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
- def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+/// SS42I_binop_rm - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
OpSize;
- def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
+ def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in {
- defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
- 0>, VEX_4V;
+let Predicates = [HasAVX] in
+ defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
- def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
- (VPCMPGTQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
- (VPCMPGTQrm VR128:$src1, addr:$src2)>;
-}
+let Predicates = [HasAVX2] in
+ defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
-
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
- (PCMPGTQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
- (PCMPGTQrm VR128:$src1, addr:$src2)>;
+ defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
+ memopv2i64, i128mem>;
//===----------------------------------------------------------------------===//
// SSE4.2 - String/text Processing Instructions
@@ -6091,23 +6880,26 @@ multiclass pseudo_pcmpistrm<string asm> {
}
let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ let AddedComplexity = 1 in
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
- defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
-let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ let mayLoad = 1 in
def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
}
-let Defs = [XMM0, EFLAGS] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ let mayLoad = 1 in
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
@@ -6126,24 +6918,27 @@ multiclass pseudo_pcmpestrm<string asm> {
}
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ let AddedComplexity = 1 in
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
- defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
let Predicates = [HasAVX],
- Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ let mayLoad = 1 in
def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
}
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ let mayLoad = 1 in
def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
@@ -6318,8 +7113,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
}
// Perform One Round of an AES Encryption/Decryption Flow
@@ -6345,44 +7139,6 @@ let Constraints = "$src1 = $dst" in {
int_x86_aesni_aesdeclast>;
}
-let Predicates = [HasAES] in {
- def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
- (AESENCrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
- (AESENCrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
- (AESENCLASTrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
- (AESENCLASTrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
- (AESDECrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
- (AESDECrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
- (AESDECLASTrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
- (AESDECLASTrm VR128:$src1, addr:$src2)>;
-}
-
-let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in {
- def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
- (VAESENCrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
- (VAESENCrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
- (VAESENCLASTrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
- (VAESENCLASTrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
- (VAESDECrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
- (VAESDECrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
- (VAESDECLASTrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
- (VAESDECLASTrm VR128:$src1, addr:$src2)>;
-}
-
// Perform the AES InvMixColumn Transformation
let Predicates = [HasAVX, HasAES] in {
def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6394,8 +7150,7 @@ let Predicates = [HasAVX, HasAES] in {
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst,
- (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
OpSize, VEX;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6407,8 +7162,7 @@ def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst,
- (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
OpSize;
// AES Round Key Generation Assist
@@ -6423,8 +7177,7 @@ let Predicates = [HasAVX, HasAES] in {
(ins i128mem:$src1, i8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
- imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
OpSize, VEX;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -6437,8 +7190,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
- imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
OpSize;
//===----------------------------------------------------------------------===//
@@ -6446,28 +7198,32 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
//===----------------------------------------------------------------------===//
// Carry-less Multiplication instructions
-let Constraints = "$src1 = $dst" in {
-def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let neverHasSideEffects = 1 in {
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
-def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+let mayLoad = 1 in
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
-}
-// AVX carry-less Multiplication instructions
-def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
-def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+let mayLoad = 1 in
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
+} // Constraints = "$src1 = $dst"
+} // neverHasSideEffects = 1
multiclass pclmul_alias<string asm, int immop> {
@@ -6506,51 +7262,60 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, VEX;
-def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcastss>;
-def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcastss_256>;
-def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
- int_x86_avx_vbroadcast_sd_256>;
+// AVX2 adds register forms
+class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ Intrinsic Int> :
+ AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int VR128:$src))]>, VEX;
+
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcast_ss>;
+ def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcast_ss_256>;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
+ int_x86_avx2_vbroadcast_ss_ps>;
+ def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
+ int_x86_avx2_vbroadcast_ss_ps_256>;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
+
+let Predicates = [HasAVX2] in
+def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
+ int_x86_avx2_vbroadcasti128>;
+
+let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSY addr:$src)>;
-def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VBROADCASTSD addr:$src)>;
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSY addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSD addr:$src)>;
-
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSS addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSS addr:$src)>;
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+}
-def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-
+let Predicates = [HasAVX] in {
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
(i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
@@ -6559,11 +7324,11 @@ def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
(i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
(i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
@@ -6576,18 +7341,54 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
+let mayStore = 1 in
def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
(ins f128mem:$dst, VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
+}
+// Extract and store.
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+}
+
+// AVX1 patterns
+let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
@@ -6604,14 +7405,14 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v4f64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v2i64 (VEXTRACTF128rr
(v4i64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v8i16 (VEXTRACTF128rr
(v16i16 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
@@ -6619,14 +7420,14 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v16i8 (VEXTRACTF128rr
(v32i8 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
//===----------------------------------------------------------------------===//
// VMASKMOV - Conditional SIMD Packed Loads and Stores
//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
- Intrinsic IntSt, Intrinsic IntSt256,
- PatFrag pf128, PatFrag pf256> {
+ Intrinsic IntSt, Intrinsic IntSt256> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -6647,26 +7448,26 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
}
+let ExeDomain = SSEPackedSingle in
defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
int_x86_avx_maskload_ps,
int_x86_avx_maskload_ps_256,
int_x86_avx_maskstore_ps,
- int_x86_avx_maskstore_ps_256,
- memopv4f32, memopv8f32>;
+ int_x86_avx_maskstore_ps_256>;
+let ExeDomain = SSEPackedDouble in
defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskload_pd,
int_x86_avx_maskload_pd_256,
int_x86_avx_maskstore_pd,
- int_x86_avx_maskstore_pd_256,
- memopv2f64, memopv4f64>;
+ int_x86_avx_maskstore_pd_256>;
//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
//
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
- X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
- Intrinsic IntVar, Intrinsic IntImm> {
+ X86MemOperand x86memop_i, PatFrag i_frag,
+ Intrinsic IntVar, ValueType vt> {
def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -6674,86 +7475,98 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+ [(set RC:$dst, (IntVar RC:$src1,
+ (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+ [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
-}
-
-defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv4f32, memopv4i32,
- int_x86_avx_vpermilvar_ps,
- int_x86_avx_vpermil_ps>;
-defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv8f32, memopv8i32,
- int_x86_avx_vpermilvar_ps_256,
- int_x86_avx_vpermil_ps_256>;
-defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- memopv2f64, memopv2i64,
- int_x86_avx_vpermilvar_pd,
- int_x86_avx_vpermil_pd>;
-defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- memopv4f64, memopv4i64,
- int_x86_avx_vpermilvar_pd_256,
- int_x86_avx_vpermil_pd_256>;
-
-def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
- (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
- (VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ [(set RC:$dst,
+ (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX;
+}
+
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
+ defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
+ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDYmi addr:$src1, imm:$imm)>;
+
+def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+ (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDmi addr:$src1, imm:$imm)>;
+}
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
+let ExeDomain = SSEPackedSingle in {
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, VEX_4V;
+ [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
+ (i8 imm:$src3))))]>, VEX_4V;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, VEX_4V;
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
+ (i8 imm:$src3)))]>, VEX_4V;
+}
-def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
-
-def : Pat<(int_x86_avx_vperm2f128_ps_256
- VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_pd_256
- VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_si_256
- VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-
-def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//
@@ -6770,30 +7583,362 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
-//
+//===----------------------------------------------------------------------===//
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
let Predicates = [HasAVX, HasF16C] in {
- def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
- def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
- def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst),
- (ins VR256:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
- def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
- (ins VR256:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
+ def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}",
+ [(set RC:$dst, (Int VR128:$src))]>,
+ T8, OpSize, VEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+}
+}
+
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+let Predicates = [HasAVX, HasF16C] in {
+ def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+ TA, OpSize, VEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+}
+}
+
+defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
+defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
+defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
+defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+
+//===----------------------------------------------------------------------===//
+// AVX2 Instructions
+//===----------------------------------------------------------------------===//
+
+/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
+ let isCommutable = 1 in
+ def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ VEX_4V;
+ def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ VEX_4V;
}
+
+let isCommutable = 0 in {
+defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
+ VR128, memopv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
+ VR256, memopv4i64, i256mem>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
+multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int128, Intrinsic Int256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+}
+
+defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+ int_x86_avx2_pbroadcastb_128,
+ int_x86_avx2_pbroadcastb_256>;
+defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+ int_x86_avx2_pbroadcastw_128,
+ int_x86_avx2_pbroadcastw_256>;
+defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+ int_x86_avx2_pbroadcastd_128,
+ int_x86_avx2_pbroadcastd_256>;
+defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+ int_x86_avx2_pbroadcastq_128,
+ int_x86_avx2_pbroadcastq_256>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBrm addr:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBYrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDrm addr:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDYrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQYrm addr:$src)>;
+}
+
+// AVX1 broadcast patterns
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSDrm addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSDrm addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPERM - Permute instructions
+//
+
+multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic Int> {
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int VR256:$src1,
+ (bitconvert (mem_frag addr:$src2))))]>,
+ VEX_4V;
+}
+
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
+let ExeDomain = SSEPackedSingle in
+defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
+
+multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic Int> {
+ def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
+ def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>,
+ VEX;
+}
+
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
+ VEX_W;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
+ VEX_W;
+
+//===----------------------------------------------------------------------===//
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
+//
+let AddedComplexity = 1 in {
+def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
+ (i8 imm:$src3))))]>, VEX_4V;
+def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+ (i8 imm:$src3)))]>, VEX_4V;
+}
+
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// VINSERTI128 - Insert packed integer values
+//
+let neverHasSideEffects = 1 in {
+def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>,
+ VEX_4V;
+def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+}
+
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTI128 - Extract packed integer values
+//
+def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+ VEX;
+let neverHasSideEffects = 1, mayStore = 1 in
+def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTI128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTI128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTI128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTI128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
+//
+multiclass avx2_pmovmask<string OpcodeStr,
+ Intrinsic IntLd128, Intrinsic IntLd256,
+ Intrinsic IntSt128, Intrinsic IntSt256> {
+ def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
+ def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
+ def mr : AVX28I<0x8e, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
+ int_x86_avx2_maskload_d,
+ int_x86_avx2_maskload_d_256,
+ int_x86_avx2_maskstore_d,
+ int_x86_avx2_maskstore_d_256>;
+defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
+ int_x86_avx2_maskload_q,
+ int_x86_avx2_maskload_q_256,
+ int_x86_avx2_maskstore_q,
+ int_x86_avx2_maskstore_q_256>, VEX_W;
+
+
+//===----------------------------------------------------------------------===//
+// Variable Bit Shifts
+//
+multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128, ValueType vt256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
+ VEX_4V;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1,
+ (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
+ VEX_4V;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
+ VEX_4V;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1,
+ (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
+ VEX_4V;
+}
+
+defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
diff --git a/lib/Target/X86/X86InstrSVM.td b/lib/Target/X86/X86InstrSVM.td
new file mode 100644
index 000000000000..757dcd0b5dcb
--- /dev/null
+++ b/lib/Target/X86/X86InstrSVM.td
@@ -0,0 +1,62 @@
+//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the AMD SVM instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVM instructions
+
+// 0F 01 D9
+def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
+
+// 0F 01 DC
+def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
+
+// 0F 01 DD
+def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
+
+// 0F 01 DE
+let Uses = [EAX] in
+def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|EAX}", []>, TB;
+
+// 0F 01 D8
+let Uses = [EAX] in
+def VMRUN32 : I<0x01, MRM_D8, (outs), (ins),
+ "vmrun\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMRUN64 : I<0x01, MRM_D8, (outs), (ins),
+ "vmrun\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DA
+let Uses = [EAX] in
+def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins),
+ "vmload\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins),
+ "vmload\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DB
+let Uses = [EAX] in
+def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins),
+ "vmsave\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins),
+ "vmsave\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DF
+let Uses = [EAX, ECX] in
+def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
+ "invlpga\t{%ecx, %eax|EAX, ECX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX, ECX] in
+def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
+ "invlpga\t{%ecx, %rax|RAX, ECX}", []>, TB, Requires<[In64BitMode]>;
+
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 8278568184ff..bdeb63ffbd69 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -1,10 +1,10 @@
-//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
-//
+//===-- X86InstrShiftRotate.td - Shift and Rotate Instrs ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the shift and rotate instructions.
@@ -19,44 +19,46 @@ let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+ [(set GR8:$dst, (shl GR8:$src1, CL))], IIC_SR>;
def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
"shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+ [(set GR16:$dst, (shl GR16:$src1, CL))], IIC_SR>, OpSize;
def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+ [(set GR32:$dst, (shl GR32:$src1, CL))], IIC_SR>;
def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+ "shl{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>;
} // Uses = [CL]
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>,
+ OpSize;
def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"shl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>;
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"shl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
// NOTE: We don't include patterns for shifts of a register by one, because
// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
- "shl{b}\t$dst", []>;
+ "shl{b}\t$dst", [], IIC_SR>;
def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t$dst", []>, OpSize;
+ "shl{w}\t$dst", [], IIC_SR>, OpSize;
def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t$dst", []>;
+ "shl{l}\t$dst", [], IIC_SR>;
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t$dst", []>;
+ "shl{q}\t$dst", [], IIC_SR>;
} // isConvertibleToThreeAddress = 1
} // Constraints = "$src = $dst"
@@ -66,223 +68,266 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
let Uses = [CL] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>;
def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
"shl{w}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>,
+ OpSize;
def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
"shl{l}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>;
def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
- "shl{q}\t{%cl, $dst|$dst, %CL}",
- [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+ "shl{q}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
}
def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
"shl{b}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
"shl{w}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
OpSize;
def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
"shl{l}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
"shl{q}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
// Shift by 1
def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t$dst",
- [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
"shl{w}\t$dst",
- [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
OpSize;
def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
"shl{l}\t$dst",
- [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t$dst",
- [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+ [(set GR8:$dst, (srl GR8:$src1, CL))], IIC_SR>;
def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
"shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+ [(set GR16:$dst, (srl GR16:$src1, CL))], IIC_SR>, OpSize;
def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+ [(set GR32:$dst, (srl GR32:$src1, CL))], IIC_SR>;
def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
- "shr{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+ "shr{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>;
}
def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
"shr{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"shr{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>, OpSize;
def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"shr{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
"shr{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>;
// Shift right by 1
def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
"shr{b}\t$dst",
- [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))], IIC_SR>;
def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
"shr{w}\t$dst",
- [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))], IIC_SR>, OpSize;
def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t$dst",
- [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))], IIC_SR>;
def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t$dst",
- [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
} // Constraints = "$src = $dst"
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>;
def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
"shr{w}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>,
OpSize;
def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
"shr{l}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>;
def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
- "shr{q}\t{%cl, $dst|$dst, %CL}",
- [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+ "shr{q}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
}
def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
"shr{b}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
"shr{w}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
OpSize;
def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
"shr{l}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
"shr{q}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
// Shift by 1
def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t$dst",
- [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
"shr{w}\t$dst",
- [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,OpSize;
def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
"shr{l}\t$dst",
- [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t$dst",
- [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+ [(set GR8:$dst, (sra GR8:$src1, CL))],
+ IIC_SR>;
def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
"sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+ [(set GR16:$dst, (sra GR16:$src1, CL))],
+ IIC_SR>, OpSize;
def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+ [(set GR32:$dst, (sra GR32:$src1, CL))],
+ IIC_SR>;
def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
- "sar{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+ "sar{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (sra GR64:$src1, CL))],
+ IIC_SR>;
}
def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"sar{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"sar{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>,
OpSize;
def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"sar{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"sar{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
// Shift by 1
def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t$dst",
- [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))],
+ IIC_SR>;
def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
"sar{w}\t$dst",
- [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))],
+ IIC_SR>, OpSize;
def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t$dst",
- [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))],
+ IIC_SR>;
def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t$dst",
- [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
+ IIC_SR>;
} // Constraints = "$src = $dst"
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
"sar{w}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)],
+ IIC_SR>, OpSize;
def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
- "sar{q}\t{%cl, $dst|$dst, %CL}",
- [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+ "sar{q}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
}
def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
"sar{b}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
"sar{w}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
OpSize;
def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
"sar{l}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
"sar{q}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
// Shift by 1
def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t$dst",
- [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
"sar{w}\t$dst",
- [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
OpSize;
def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t$dst",
- [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
"sar{q}\t$dst",
- [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
//===----------------------------------------------------------------------===//
// Rotate instructions
@@ -290,125 +335,125 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
let Constraints = "$src1 = $dst" in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t$dst", []>;
+ "rcl{b}\t$dst", [], IIC_SR>;
def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+ "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t$dst", []>, OpSize;
+ "rcl{w}\t$dst", [], IIC_SR>, OpSize;
def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
let Uses = [CL] in
def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+ "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t$dst", []>;
+ "rcl{l}\t$dst", [], IIC_SR>;
def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+ "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
- "rcl{q}\t$dst", []>;
+ "rcl{q}\t$dst", [], IIC_SR>;
def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+ "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t$dst", []>;
+ "rcr{b}\t$dst", [], IIC_SR>;
def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+ "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t$dst", []>, OpSize;
+ "rcr{w}\t$dst", [], IIC_SR>, OpSize;
def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
let Uses = [CL] in
def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+ "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t$dst", []>;
+ "rcr{l}\t$dst", [], IIC_SR>;
def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+ "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
- "rcr{q}\t$dst", []>;
+ "rcr{q}\t$dst", [], IIC_SR>;
def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+ "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
} // Constraints = "$src = $dst"
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t$dst", []>;
+ "rcl{b}\t$dst", [], IIC_SR>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t$dst", []>, OpSize;
+ "rcl{w}\t$dst", [], IIC_SR>, OpSize;
def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t$dst", []>;
+ "rcl{l}\t$dst", [], IIC_SR>;
def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
- "rcl{q}\t$dst", []>;
+ "rcl{q}\t$dst", [], IIC_SR>;
def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t$dst", []>;
+ "rcr{b}\t$dst", [], IIC_SR>;
def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t$dst", []>, OpSize;
+ "rcr{w}\t$dst", [], IIC_SR>, OpSize;
def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t$dst", []>;
+ "rcr{l}\t$dst", [], IIC_SR>;
def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
- "rcr{q}\t$dst", []>;
+ "rcr{q}\t$dst", [], IIC_SR>;
def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in {
def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+ "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+ "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+ "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+ "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+ "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+ "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+ "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+ "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
}
let Constraints = "$src1 = $dst" in {
@@ -416,179 +461,217 @@ let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+ [(set GR8:$dst, (rotl GR8:$src1, CL))], IIC_SR>;
def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+ [(set GR16:$dst, (rotl GR16:$src1, CL))], IIC_SR>, OpSize;
def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+ [(set GR32:$dst, (rotl GR32:$src1, CL))], IIC_SR>;
def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
- "rol{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+ "rol{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>;
}
def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"rol{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"rol{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>,
OpSize;
def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"rol{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
// Rotate by 1
def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t$dst",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))],
+ IIC_SR>;
def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"rol{w}\t$dst",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))],
+ IIC_SR>, OpSize;
def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t$dst",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))],
+ IIC_SR>;
def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t$dst",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
+ IIC_SR>;
} // Constraints = "$src = $dst"
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
"rol{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)],
+ IIC_SR>, OpSize;
def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
"rol{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
- "rol{q}\t{%cl, $dst|$dst, %CL}",
- [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+ "rol{q}\t{%cl, $dst|$dst, %cl}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
}
def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
"rol{b}\t{$src1, $dst|$dst, $src1}",
- [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>;
def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
"rol{w}\t{$src1, $dst|$dst, $src1}",
- [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>,
OpSize;
def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
"rol{l}\t{$src1, $dst|$dst, $src1}",
- [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>;
def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
"rol{q}\t{$src1, $dst|$dst, $src1}",
- [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+ IIC_SR>;
// Rotate by 1
def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t$dst",
- [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
"rol{w}\t$dst",
- [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
OpSize;
def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
"rol{l}\t$dst",
- [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
"rol{q}\t$dst",
- [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+ [(set GR8:$dst, (rotr GR8:$src1, CL))], IIC_SR>;
def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+ [(set GR16:$dst, (rotr GR16:$src1, CL))], IIC_SR>, OpSize;
def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+ [(set GR32:$dst, (rotr GR32:$src1, CL))], IIC_SR>;
def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
- "ror{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+ "ror{q}\t{%cl, $dst|$dst, CL}",
+ [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>;
}
def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"ror{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"ror{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
+ IIC_SR>,
OpSize;
def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"ror{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
+ IIC_SR>;
// Rotate by 1
def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t$dst",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))],
+ IIC_SR>;
def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"ror{w}\t$dst",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))],
+ IIC_SR>, OpSize;
def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t$dst",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))],
+ IIC_SR>;
def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t$dst",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
+ IIC_SR>;
} // Constraints = "$src = $dst"
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
"ror{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)],
+ IIC_SR>, OpSize;
def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
- "ror{q}\t{%cl, $dst|$dst, %CL}",
- [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+ "ror{q}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
+ IIC_SR>;
}
def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
"ror{b}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
"ror{w}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>,
OpSize;
def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
"ror{l}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
"ror{q}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+ IIC_SR>;
// Rotate by 1
def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
"ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>,
OpSize;
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t$dst",
- [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ IIC_SR>;
//===----------------------------------------------------------------------===//
@@ -601,30 +684,36 @@ let Uses = [CL] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))],
+ IIC_SHD16_REG_CL>,
TB, OpSize;
def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))],
+ IIC_SHD16_REG_CL>,
TB, OpSize;
def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))],
+ IIC_SHD32_REG_CL>, TB;
def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))],
+ IIC_SHD32_REG_CL>, TB;
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))],
+ IIC_SHD64_REG_CL>,
TB;
def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))],
+ IIC_SHD64_REG_CL>,
TB;
}
@@ -634,42 +723,42 @@ def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(ins GR16:$src1, GR16:$src2, i8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
+ (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, i8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
+ (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, i8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
+ (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, i8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
+ (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, i8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
- (i8 imm:$src3)))]>,
+ (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, i8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
- (i8 imm:$src3)))]>,
+ (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
}
} // Constraints = "$src = $dst"
@@ -678,69 +767,110 @@ let Uses = [CL] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
+ addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize;
def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
+ addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize;
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
+ addr:$dst)], IIC_SHD32_MEM_CL>, TB;
def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
+ addr:$dst)], IIC_SHD32_MEM_CL>, TB;
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ addr:$dst)], IIC_SHD64_MEM_CL>, TB;
def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ addr:$dst)], IIC_SHD64_MEM_CL>, TB;
}
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD16_MEM_IM>,
TB, OpSize;
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD16_MEM_IM>,
TB, OpSize;
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD32_MEM_IM>,
TB;
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD32_MEM_IM>,
TB;
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD64_MEM_IM>,
TB;
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ (i8 imm:$src3)), addr:$dst)],
+ IIC_SHD64_MEM_IM>,
TB;
} // Defs = [EFLAGS]
+multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+ def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TAXD, VEX;
+ let mayLoad = 1 in
+ def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TAXD, VEX;
+}
+}
+
+multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+ def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ VEX_4VOp3;
+ let mayLoad = 1 in
+ def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ VEX_4VOp3;
+}
+}
+
+let Predicates = [HasBMI2] in {
+ defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
+ defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, VEX_W;
+ defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
+ defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W;
+ defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD;
+ defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
+ defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize;
+ defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 05a5b36b95ed..bddba6cb0c4d 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -1,10 +1,10 @@
-//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
-//
+//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 instructions that are generally used in
@@ -45,18 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
-def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
-def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB,
Requires<[In64BitMode]>;
def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
- Requires<[In32BitMode]>;
-def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
Requires<[In64BitMode]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
Requires<[In64BitMode]>;
@@ -215,18 +214,18 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB, OpSize;
+ "str{w}\t$dst", []>, TB, OpSize;
def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
- "str{l}\t{$dst}", []>, TB;
+ "str{l}\t$dst", []>, TB;
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
- "str{q}\t{$dst}", []>, TB;
+ "str{q}\t$dst", []>, TB;
def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
+ "str{w}\t$dst", []>, TB;
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
- "ltr{w}\t{$src}", []>, TB;
+ "ltr{w}\t$src", []>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
- "ltr{w}\t{$src}", []>, TB;
+ "ltr{w}\t$src", []>, TB;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
"push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize;
@@ -447,21 +446,38 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
//===----------------------------------------------------------------------===//
// FS/GS Base Instructions
-let Predicates = [In64BitMode] in {
+let Predicates = [HasFSGSBase, In64BitMode] in {
def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
- "rdfsbase{l}\t$dst", []>, TB, XS;
+ "rdfsbase{l}\t$dst",
+ [(set GR32:$dst, (int_x86_rdfsbase_32))]>, TB, XS;
def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
- "rdfsbase{q}\t$dst", []>, TB, XS;
+ "rdfsbase{q}\t$dst",
+ [(set GR64:$dst, (int_x86_rdfsbase_64))]>, TB, XS;
def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
- "rdgsbase{l}\t$dst", []>, TB, XS;
+ "rdgsbase{l}\t$dst",
+ [(set GR32:$dst, (int_x86_rdgsbase_32))]>, TB, XS;
def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
- "rdgsbase{q}\t$dst", []>, TB, XS;
- def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst),
- "wrfsbase{l}\t$dst", []>, TB, XS;
- def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst),
- "wrfsbase{q}\t$dst", []>, TB, XS;
- def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst),
- "wrgsbase{l}\t$dst", []>, TB, XS;
- def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst),
- "wrgsbase{q}\t$dst", []>, TB, XS;
+ "rdgsbase{q}\t$dst",
+ [(set GR64:$dst, (int_x86_rdgsbase_64))]>, TB, XS;
+ def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
+ "wrfsbase{l}\t$src",
+ [(int_x86_wrfsbase_32 GR32:$src)]>, TB, XS;
+ def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
+ "wrfsbase{q}\t$src",
+ [(int_x86_wrfsbase_64 GR64:$src)]>, TB, XS;
+ def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
+ "wrgsbase{l}\t$src",
+ [(int_x86_wrgsbase_32 GR32:$src)]>, TB, XS;
+ def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
+ "wrgsbase{q}\t$src",
+ [(int_x86_wrgsbase_64 GR64:$src)]>, TB, XS;
}
+
+//===----------------------------------------------------------------------===//
+// INVPCID Instruction
+def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In32BitMode]>;
+def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 09a7a7d0c4d0..6a8f0c848673 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -1,10 +1,10 @@
-//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
-//
+//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel VMX instruction
@@ -17,18 +17,24 @@
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In32BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In32BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmclear\t$vmcs", []>, OpSize, TB;
+// OF 01 D4
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
// 0F 01 C2
def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
// 0F 01 C3
@@ -38,23 +44,23 @@ def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
"vmptrst\t$vmcs", []>, TB;
def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
// 0F 01 C4
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
- "vmxon\t{$vmxon}", []>, XS;
+ "vmxon\t$vmxon", []>, XS;
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
new file mode 100644
index 000000000000..65bbcb55ae12
--- /dev/null
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -0,0 +1,307 @@
+//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
+ defm VFRCZPS : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+ defm VFRCZPD : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
+}
+
+// Scalar load 2 addr operand instructions
+let Constraints = "$src1 = $dst" in {
+multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ Operand memop, ComplexPattern mem_cpat> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+ VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+ memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1,
+ (bitconvert mem_cpat:$src2)))]>, VEX;
+}
+
+} // Constraints = "$src1 = $dst"
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+ ssmem, sse_load_f32>;
+ defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+ sdmem, sse_load_f64>;
+}
+
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ PatFrag memop> {
+ def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
+ def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
+ memopv8f32>;
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
+ memopv4f64>;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
+ VEX_4V, VEX_W;
+ def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>,
+ VEX_4VOp3;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+ defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+ defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+ defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+ defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+ defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+ defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+ defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+ defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+ defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+ defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1 in {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX;
+ let mayLoad = 1 in
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX;
+ }
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPROTW : xop3opimm<0xC1, "vprotw">;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq">;
+ defm VPROTD : xop3opimm<0xC2, "vprotd">;
+ defm VPROTB : xop3opimm<0xC0, "vprotb">;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3))]>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+ defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+ defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+ defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+ defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+ defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+ defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+ defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+ defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+ defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+ defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+ defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
+}
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xop4opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType VT> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (VT (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, VEX_4V;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (VT (OpNode VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ imm:$src3)))]>, VEX_4V;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCOMB : xop4opimm<0xCC, "vpcomb", X86vpcom, v16i8>;
+ defm VPCOMW : xop4opimm<0xCD, "vpcomw", X86vpcom, v8i16>;
+ defm VPCOMD : xop4opimm<0xCE, "vpcomd", X86vpcom, v4i32>;
+ defm VPCOMQ : xop4opimm<0xCF, "vpcomq", X86vpcom, v2i64>;
+ defm VPCOMUB : xop4opimm<0xEC, "vpcomub", X86vpcomu, v16i8>;
+ defm VPCOMUW : xop4opimm<0xED, "vpcomuw", X86vpcomu, v8i16>;
+ defm VPCOMUD : xop4opimm<0xEE, "vpcomud", X86vpcomu, v4i32>;
+ defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", X86vpcomu, v2i64>;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>,
+ VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))))]>,
+ VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+ def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3))]>,
+ VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
+ defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
+}
+
+multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>,
+ VEX_4V, VEX_I8IMM;
+ def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int VR256:$src1, VR256:$src2,
+ (bitconvert (memopv4i64 addr:$src3))))]>,
+ VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+ def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)),
+ VR256:$src3))]>,
+ VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+}
+
+multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
+ Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
+ def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
+ def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
+ VEX_W, MemOp4;
+ def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
+ def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>;
+ def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
+ VEX_W, MemOp4;
+ def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>;
+}
+
+defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
+ int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>;
+defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
+ int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>;
+
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 3f88fa69d0ee..0168d12231f7 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -300,7 +300,10 @@ extern "C" {
SIZE(X86CompilationCallback_SSE)
);
# else
- void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is
+ // no support for inline assembly
+ static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
_declspec(naked) void X86CompilationCallback(void) {
__asm {
@@ -424,7 +427,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
TargetJITInfo::LazyResolverFn
X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ TsanIgnoreWritesBegin();
JITCompilerFunction = F;
+ TsanIgnoreWritesEnd();
#if defined (X86_32_JIT) && !defined (_MSC_VER)
if (Subtarget->hasSSE1())
@@ -569,6 +574,5 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
return TLSOffset;
#else
llvm_unreachable("Cannot allocate thread local storage on this arch!");
- return 0;
#endif
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 238420c236b1..c76d3ccf5d94 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -1,4 +1,4 @@
-//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 50bc14d357f8..b578e8d9285d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,10 +12,11 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -26,7 +27,6 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Type.h"
using namespace llvm;
X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
@@ -154,6 +154,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Ctx),
Ctx);
break;
+ case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
@@ -230,7 +231,8 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
/// a short fixed-register form.
static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
unsigned ImmOp = Inst.getNumOperands() - 1;
- assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+ assert(Inst.getOperand(0).isReg() &&
+ (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
Inst.getNumOperands() == 2) && "Unexpected instruction!");
@@ -335,6 +337,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = LowerSymbolOperand(MO,
AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
break;
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ continue;
}
OutMI.addOperand(MCOp);
@@ -368,14 +373,12 @@ ReSimplify:
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
- case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
+ case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
+ case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -386,14 +389,12 @@ ReSimplify:
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
- // register inputs modeled as normal uses instead of implicit uses. As such,
- // truncate off all but the first operand (the callee). FIXME: Change isel.
+ // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
+ // inputs modeled as normal uses instead of implicit uses. As such, truncate
+ // off all but the first operand (the callee). FIXME: Change isel.
case X86::TAILJMPr64:
case X86::CALL64r:
- case X86::CALL64pcrel32:
- case X86::WINCALL64r:
- case X86::WINCALL64pcrel32: {
+ case X86::CALL64pcrel32: {
unsigned Opcode = OutMI.getOpcode();
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
@@ -415,7 +416,7 @@ ReSimplify:
case X86::TAILJMPd64: {
unsigned Opcode;
switch (OutMI.getOpcode()) {
- default: assert(0 && "Invalid opcode");
+ default: llvm_unreachable("Invalid opcode");
case X86::TAILJMPr: Opcode = X86::JMP32r; break;
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
@@ -527,6 +528,22 @@ ReSimplify:
case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
+
+ case X86::MORESTACK_RET:
+ OutMI.setOpcode(X86::RET);
+ break;
+
+ case X86::MORESTACK_RET_RESTORE_R10: {
+ MCInst retInst;
+
+ OutMI.setOpcode(X86::MOV64rr);
+ OutMI.addOperand(MCOperand::CreateReg(X86::R10));
+ OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
+
+ retInst.setOpcode(X86::RET);
+ AsmPrinter.OutStreamer.EmitInstruction(retInst);
+ break;
+ }
}
}
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 021007239128..40df3db7d480 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -1,4 +1,4 @@
-//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..568dc222d9d1
--- /dev/null
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void X86MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index b0bb313ec639..c7471091ec47 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,10 +1,10 @@
-//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
-//
+//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file declares X86-specific per-machine-function information.
@@ -21,6 +21,8 @@ namespace llvm {
/// X86MachineFunctionInfo - This class is derived from MachineFunction and
/// contains private X86 target-specific information for each MachineFunction.
class X86MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
/// ForceFramePointer - True if the function is required to use of frame
/// pointer for reasons other than it containing dynamic allocation or
/// that FP eliminatation is turned off. For example, Cygwin main function
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c1ac9f343116..b56025fbb09c 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "X86.h"
#include "X86RegisterInfo.h"
+#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
@@ -127,121 +127,13 @@ const TargetRegisterClass *
X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B,
unsigned SubIdx) const {
- switch (SubIdx) {
- default: return 0;
- case X86::sub_8bit:
- if (B == &X86::GR8RegClass) {
- if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
- return A;
- } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
- A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass ||
- A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_ABCDRegClass;
- else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
- A == &X86::GR32_NOREXRegClass ||
- A == &X86::GR32_NOSPRegClass)
- return &X86::GR32_ABCDRegClass;
- else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
- A == &X86::GR16_NOREXRegClass)
- return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::GR8_NOREXRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_NOREXRegClass;
- else if (A == &X86::GR64_ABCDRegClass)
- return &X86::GR64_ABCDRegClass;
- else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
- A == &X86::GR32_NOSPRegClass)
- return &X86::GR32_NOREXRegClass;
- else if (A == &X86::GR32_ABCDRegClass)
- return &X86::GR32_ABCDRegClass;
- else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
- return &X86::GR16_NOREXRegClass;
- else if (A == &X86::GR16_ABCDRegClass)
- return &X86::GR16_ABCDRegClass;
- }
- break;
- case X86::sub_8bit_hi:
- if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
- switch (A->getSize()) {
- case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
- case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
- case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
- default: return 0;
- }
- break;
- case X86::sub_16bit:
- if (B == &X86::GR16RegClass) {
- if (A->getSize() == 4 || A->getSize() == 8)
- return A;
- } else if (B == &X86::GR16_ABCDRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
- A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass ||
- A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_ABCDRegClass;
- else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
- A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
- return &X86::GR32_ABCDRegClass;
- } else if (B == &X86::GR16_NOREXRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_NOREXRegClass;
- else if (A == &X86::GR64_ABCDRegClass)
- return &X86::GR64_ABCDRegClass;
- else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
- A == &X86::GR32_NOSPRegClass)
- return &X86::GR32_NOREXRegClass;
- else if (A == &X86::GR32_ABCDRegClass)
- return &X86::GR64_ABCDRegClass;
- }
- break;
- case X86::sub_32bit:
- if (B == &X86::GR32RegClass) {
- if (A->getSize() == 8)
- return A;
- } else if (B == &X86::GR32_NOSPRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
- return &X86::GR64_NOSPRegClass;
- if (A->getSize() == 8)
- return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
- } else if (B == &X86::GR32_ABCDRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
- A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass ||
- A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_ABCDRegClass;
- } else if (B == &X86::GR32_NOREXRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
- return &X86::GR64_NOREXRegClass;
- else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_NOREX_NOSPRegClass;
- else if (A == &X86::GR64_ABCDRegClass)
- return &X86::GR64_ABCDRegClass;
- } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_NOREX_NOSPRegClass;
- else if (A == &X86::GR64_ABCDRegClass)
- return &X86::GR64_ABCDRegClass;
- }
- break;
- case X86::sub_ss:
- if (B == &X86::FR32RegClass)
- return A;
- break;
- case X86::sub_sd:
- if (B == &X86::FR64RegClass)
- return A;
- break;
- case X86::sub_xmm:
- if (B == &X86::VR128RegClass)
- return A;
- break;
+ // The sub_8bit sub-register index is more constrained in 32-bit mode.
+ if (!Is64Bit && SubIdx == X86::sub_8bit) {
+ A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
+ if (!A)
+ return 0;
}
- return 0;
+ return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
}
const TargetRegisterClass*
@@ -334,7 +226,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-const unsigned *
+const uint16_t *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
@@ -345,45 +237,29 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
}
- static const unsigned GhcCalleeSavedRegs[] = {
- 0
- };
-
- static const unsigned CalleeSavedRegs32Bit[] = {
- X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
- };
-
- static const unsigned CalleeSavedRegs32EHRet[] = {
- X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
- };
-
- static const unsigned CalleeSavedRegs64Bit[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
-
- static const unsigned CalleeSavedRegs64EHRet[] = {
- X86::RAX, X86::RDX, X86::RBX, X86::R12,
- X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
-
- static const unsigned CalleeSavedRegsWin64[] = {
- X86::RBX, X86::RBP, X86::RDI, X86::RSI,
- X86::R12, X86::R13, X86::R14, X86::R15,
- X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9,
- X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
- X86::XMM14, X86::XMM15, 0
- };
-
- if (ghcCall) {
- return GhcCalleeSavedRegs;
- } else if (Is64Bit) {
+ if (ghcCall)
+ return CSR_Ghc_SaveList;
+ if (Is64Bit) {
if (IsWin64)
- return CalleeSavedRegsWin64;
- else
- return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
- } else {
- return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+ return CSR_Win64_SaveList;
+ if (callsEHReturn)
+ return CSR_64EHRet_SaveList;
+ return CSR_64_SaveList;
}
+ if (callsEHReturn)
+ return CSR_32EHRet_SaveList;
+ return CSR_32_SaveList;
+}
+
+const uint32_t*
+X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (CC == CallingConv::GHC)
+ return CSR_Ghc_RegMask;
+ if (!Is64Bit)
+ return CSR_32_RegMask;
+ if (IsWin64)
+ return CSR_Win64_RegMask;
+ return CSR_64_RegMask;
}
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -428,16 +304,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (unsigned n = 0; n != 8; ++n) {
// R8, R9, ...
- const unsigned GPR64[] = {
+ static const uint16_t GPR64[] = {
X86::R8, X86::R9, X86::R10, X86::R11,
X86::R12, X86::R13, X86::R14, X86::R15
};
- for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
+ for (const uint16_t *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
Reserved.set(Reg);
// XMM8, XMM9, ...
assert(X86::XMM15 == X86::XMM8+7);
- for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+ for (const uint16_t *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
++AI)
Reserved.set(Reg);
}
@@ -452,7 +328,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (RealignStack &&
+ return (MF.getTarget().Options.RealignStack &&
!MFI->hasVarSizedObjects());
}
@@ -583,7 +459,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// sure we restore the stack pointer immediately after the call, there may
// be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
MachineBasicBlock::iterator B = MBB.begin();
- while (I != B && !llvm::prior(I)->getDesc().isCall())
+ while (I != B && !llvm::prior(I)->isCall())
--I;
MBB.insert(I, New);
}
@@ -650,12 +526,10 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned X86RegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
- return 0;
}
unsigned X86RegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
- return 0;
}
namespace llvm {
@@ -665,7 +539,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
case MVT::i8:
if (High) {
switch (Reg) {
- default: return 0;
+ default: return getX86SubSuperRegister(Reg, MVT::i64, High);
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -785,6 +659,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
return X86::R15D;
}
case MVT::i64:
+ // For 64-bit mode if we've requested a "high" register and the
+ // Q or r constraints we want one of these high registers or
+ // just the register name otherwise.
+ if (High) {
+ switch (Reg) {
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ // Fallthrough.
+ }
+ }
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
@@ -821,8 +711,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
return X86::R15;
}
}
-
- return Reg;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 7d39c6853597..bee03936f1f7 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//===-- X86RegisterInfo.h - X86 Register Information Impl -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -95,7 +95,8 @@ public:
/// getCalleeSavedRegs - Return a null-terminated list of all of the
/// callee-save registers on this target.
- const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 9a7db36e0871..5263a4934cbd 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -70,7 +70,7 @@ let Namespace = "X86" in {
def BH : Register<"bh">;
// 16-bit registers
- let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
+ let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index 990962dc4173..857becff66d7 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -1,4 +1,4 @@
-//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//===-- X86Relocations.h - X86 Code Relocations -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
new file mode 100644
index 000000000000..17f4efd8bcb0
--- /dev/null
+++ b/lib/Target/X86/X86Schedule.td
@@ -0,0 +1,273 @@
+//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for X86
+def IIC_DEFAULT : InstrItinClass;
+def IIC_ALU_MEM : InstrItinClass;
+def IIC_ALU_NONMEM : InstrItinClass;
+def IIC_LEA : InstrItinClass;
+def IIC_LEA_16 : InstrItinClass;
+def IIC_MUL8 : InstrItinClass;
+def IIC_MUL16_MEM : InstrItinClass;
+def IIC_MUL16_REG : InstrItinClass;
+def IIC_MUL32_MEM : InstrItinClass;
+def IIC_MUL32_REG : InstrItinClass;
+def IIC_MUL64 : InstrItinClass;
+// imul by al, ax, eax, tax
+def IIC_IMUL8 : InstrItinClass;
+def IIC_IMUL16_MEM : InstrItinClass;
+def IIC_IMUL16_REG : InstrItinClass;
+def IIC_IMUL32_MEM : InstrItinClass;
+def IIC_IMUL32_REG : InstrItinClass;
+def IIC_IMUL64 : InstrItinClass;
+// imul reg by reg|mem
+def IIC_IMUL16_RM : InstrItinClass;
+def IIC_IMUL16_RR : InstrItinClass;
+def IIC_IMUL32_RM : InstrItinClass;
+def IIC_IMUL32_RR : InstrItinClass;
+def IIC_IMUL64_RM : InstrItinClass;
+def IIC_IMUL64_RR : InstrItinClass;
+// imul reg = reg/mem * imm
+def IIC_IMUL16_RMI : InstrItinClass;
+def IIC_IMUL16_RRI : InstrItinClass;
+def IIC_IMUL32_RMI : InstrItinClass;
+def IIC_IMUL32_RRI : InstrItinClass;
+def IIC_IMUL64_RMI : InstrItinClass;
+def IIC_IMUL64_RRI : InstrItinClass;
+// div
+def IIC_DIV8_MEM : InstrItinClass;
+def IIC_DIV8_REG : InstrItinClass;
+def IIC_DIV16 : InstrItinClass;
+def IIC_DIV32 : InstrItinClass;
+def IIC_DIV64 : InstrItinClass;
+// idiv
+def IIC_IDIV8 : InstrItinClass;
+def IIC_IDIV16 : InstrItinClass;
+def IIC_IDIV32 : InstrItinClass;
+def IIC_IDIV64 : InstrItinClass;
+// neg/not/inc/dec
+def IIC_UNARY_REG : InstrItinClass;
+def IIC_UNARY_MEM : InstrItinClass;
+// add/sub/and/or/xor/adc/sbc/cmp/test
+def IIC_BIN_MEM : InstrItinClass;
+def IIC_BIN_NONMEM : InstrItinClass;
+// shift/rotate
+def IIC_SR : InstrItinClass;
+// shift double
+def IIC_SHD16_REG_IM : InstrItinClass;
+def IIC_SHD16_REG_CL : InstrItinClass;
+def IIC_SHD16_MEM_IM : InstrItinClass;
+def IIC_SHD16_MEM_CL : InstrItinClass;
+def IIC_SHD32_REG_IM : InstrItinClass;
+def IIC_SHD32_REG_CL : InstrItinClass;
+def IIC_SHD32_MEM_IM : InstrItinClass;
+def IIC_SHD32_MEM_CL : InstrItinClass;
+def IIC_SHD64_REG_IM : InstrItinClass;
+def IIC_SHD64_REG_CL : InstrItinClass;
+def IIC_SHD64_MEM_IM : InstrItinClass;
+def IIC_SHD64_MEM_CL : InstrItinClass;
+// cmov
+def IIC_CMOV16_RM : InstrItinClass;
+def IIC_CMOV16_RR : InstrItinClass;
+def IIC_CMOV32_RM : InstrItinClass;
+def IIC_CMOV32_RR : InstrItinClass;
+def IIC_CMOV64_RM : InstrItinClass;
+def IIC_CMOV64_RR : InstrItinClass;
+// set
+def IIC_SET_R : InstrItinClass;
+def IIC_SET_M : InstrItinClass;
+// jmp/jcc/jcxz
+def IIC_Jcc : InstrItinClass;
+def IIC_JCXZ : InstrItinClass;
+def IIC_JMP_REL : InstrItinClass;
+def IIC_JMP_REG : InstrItinClass;
+def IIC_JMP_MEM : InstrItinClass;
+def IIC_JMP_FAR_MEM : InstrItinClass;
+def IIC_JMP_FAR_PTR : InstrItinClass;
+// loop
+def IIC_LOOP : InstrItinClass;
+def IIC_LOOPE : InstrItinClass;
+def IIC_LOOPNE : InstrItinClass;
+// call
+def IIC_CALL_RI : InstrItinClass;
+def IIC_CALL_MEM : InstrItinClass;
+def IIC_CALL_FAR_MEM : InstrItinClass;
+def IIC_CALL_FAR_PTR : InstrItinClass;
+// ret
+def IIC_RET : InstrItinClass;
+def IIC_RET_IMM : InstrItinClass;
+//sign extension movs
+def IIC_MOVSX : InstrItinClass;
+def IIC_MOVSX_R16_R8 : InstrItinClass;
+def IIC_MOVSX_R16_M8 : InstrItinClass;
+def IIC_MOVSX_R16_R16 : InstrItinClass;
+def IIC_MOVSX_R32_R32 : InstrItinClass;
+//zero extension movs
+def IIC_MOVZX : InstrItinClass;
+def IIC_MOVZX_R16_R8 : InstrItinClass;
+def IIC_MOVZX_R16_M8 : InstrItinClass;
+
+def IIC_REP_MOVS : InstrItinClass;
+def IIC_REP_STOS : InstrItinClass;
+
+// SSE scalar/parallel binary operations
+def IIC_SSE_ALU_F32S_RR : InstrItinClass;
+def IIC_SSE_ALU_F32S_RM : InstrItinClass;
+def IIC_SSE_ALU_F64S_RR : InstrItinClass;
+def IIC_SSE_ALU_F64S_RM : InstrItinClass;
+def IIC_SSE_MUL_F32S_RR : InstrItinClass;
+def IIC_SSE_MUL_F32S_RM : InstrItinClass;
+def IIC_SSE_MUL_F64S_RR : InstrItinClass;
+def IIC_SSE_MUL_F64S_RM : InstrItinClass;
+def IIC_SSE_DIV_F32S_RR : InstrItinClass;
+def IIC_SSE_DIV_F32S_RM : InstrItinClass;
+def IIC_SSE_DIV_F64S_RR : InstrItinClass;
+def IIC_SSE_DIV_F64S_RM : InstrItinClass;
+def IIC_SSE_ALU_F32P_RR : InstrItinClass;
+def IIC_SSE_ALU_F32P_RM : InstrItinClass;
+def IIC_SSE_ALU_F64P_RR : InstrItinClass;
+def IIC_SSE_ALU_F64P_RM : InstrItinClass;
+def IIC_SSE_MUL_F32P_RR : InstrItinClass;
+def IIC_SSE_MUL_F32P_RM : InstrItinClass;
+def IIC_SSE_MUL_F64P_RR : InstrItinClass;
+def IIC_SSE_MUL_F64P_RM : InstrItinClass;
+def IIC_SSE_DIV_F32P_RR : InstrItinClass;
+def IIC_SSE_DIV_F32P_RM : InstrItinClass;
+def IIC_SSE_DIV_F64P_RR : InstrItinClass;
+def IIC_SSE_DIV_F64P_RM : InstrItinClass;
+
+def IIC_SSE_COMIS_RR : InstrItinClass;
+def IIC_SSE_COMIS_RM : InstrItinClass;
+
+def IIC_SSE_HADDSUB_RR : InstrItinClass;
+def IIC_SSE_HADDSUB_RM : InstrItinClass;
+
+def IIC_SSE_BIT_P_RR : InstrItinClass;
+def IIC_SSE_BIT_P_RM : InstrItinClass;
+
+def IIC_SSE_INTALU_P_RR : InstrItinClass;
+def IIC_SSE_INTALU_P_RM : InstrItinClass;
+def IIC_SSE_INTALUQ_P_RR : InstrItinClass;
+def IIC_SSE_INTALUQ_P_RM : InstrItinClass;
+
+def IIC_SSE_INTMUL_P_RR : InstrItinClass;
+def IIC_SSE_INTMUL_P_RM : InstrItinClass;
+
+def IIC_SSE_INTSH_P_RR : InstrItinClass;
+def IIC_SSE_INTSH_P_RM : InstrItinClass;
+def IIC_SSE_INTSH_P_RI : InstrItinClass;
+
+def IIC_SSE_CMPP_RR : InstrItinClass;
+def IIC_SSE_CMPP_RM : InstrItinClass;
+
+def IIC_SSE_SHUFP : InstrItinClass;
+def IIC_SSE_PSHUF : InstrItinClass;
+
+def IIC_SSE_UNPCK : InstrItinClass;
+
+def IIC_SSE_MOVMSK : InstrItinClass;
+def IIC_SSE_MASKMOV : InstrItinClass;
+
+def IIC_SSE_PEXTRW : InstrItinClass;
+def IIC_SSE_PINSRW : InstrItinClass;
+
+def IIC_SSE_PABS_RR : InstrItinClass;
+def IIC_SSE_PABS_RM : InstrItinClass;
+
+def IIC_SSE_SQRTP_RR : InstrItinClass;
+def IIC_SSE_SQRTP_RM : InstrItinClass;
+def IIC_SSE_SQRTS_RR : InstrItinClass;
+def IIC_SSE_SQRTS_RM : InstrItinClass;
+
+def IIC_SSE_RCPP_RR : InstrItinClass;
+def IIC_SSE_RCPP_RM : InstrItinClass;
+def IIC_SSE_RCPS_RR : InstrItinClass;
+def IIC_SSE_RCPS_RM : InstrItinClass;
+
+def IIC_SSE_MOV_S_RR : InstrItinClass;
+def IIC_SSE_MOV_S_RM : InstrItinClass;
+def IIC_SSE_MOV_S_MR : InstrItinClass;
+
+def IIC_SSE_MOVA_P_RR : InstrItinClass;
+def IIC_SSE_MOVA_P_RM : InstrItinClass;
+def IIC_SSE_MOVA_P_MR : InstrItinClass;
+
+def IIC_SSE_MOVU_P_RR : InstrItinClass;
+def IIC_SSE_MOVU_P_RM : InstrItinClass;
+def IIC_SSE_MOVU_P_MR : InstrItinClass;
+
+def IIC_SSE_MOVDQ : InstrItinClass;
+def IIC_SSE_MOVD_ToGP : InstrItinClass;
+def IIC_SSE_MOVQ_RR : InstrItinClass;
+
+def IIC_SSE_MOV_LH : InstrItinClass;
+
+def IIC_SSE_LDDQU : InstrItinClass;
+
+def IIC_SSE_MOVNT : InstrItinClass;
+
+def IIC_SSE_PHADDSUBD_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBD_RM : InstrItinClass;
+def IIC_SSE_PHADDSUBSW_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBSW_RM : InstrItinClass;
+def IIC_SSE_PHADDSUBW_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBW_RM : InstrItinClass;
+def IIC_SSE_PSHUFB_RR : InstrItinClass;
+def IIC_SSE_PSHUFB_RM : InstrItinClass;
+def IIC_SSE_PSIGN_RR : InstrItinClass;
+def IIC_SSE_PSIGN_RM : InstrItinClass;
+
+def IIC_SSE_PMADD : InstrItinClass;
+def IIC_SSE_PMULHRSW : InstrItinClass;
+def IIC_SSE_PALIGNR : InstrItinClass;
+def IIC_SSE_MWAIT : InstrItinClass;
+def IIC_SSE_MONITOR : InstrItinClass;
+
+def IIC_SSE_PREFETCH : InstrItinClass;
+def IIC_SSE_PAUSE : InstrItinClass;
+def IIC_SSE_LFENCE : InstrItinClass;
+def IIC_SSE_MFENCE : InstrItinClass;
+def IIC_SSE_SFENCE : InstrItinClass;
+def IIC_SSE_LDMXCSR : InstrItinClass;
+def IIC_SSE_STMXCSR : InstrItinClass;
+
+def IIC_SSE_CVT_PD_RR : InstrItinClass;
+def IIC_SSE_CVT_PD_RM : InstrItinClass;
+def IIC_SSE_CVT_PS_RR : InstrItinClass;
+def IIC_SSE_CVT_PS_RM : InstrItinClass;
+def IIC_SSE_CVT_PI2PS_RR : InstrItinClass;
+def IIC_SSE_CVT_PI2PS_RM : InstrItinClass;
+def IIC_SSE_CVT_Scalar_RR : InstrItinClass;
+def IIC_SSE_CVT_Scalar_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI32_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI32_RR : InstrItinClass;
+def IIC_SSE_CVT_SS2SI64_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
+def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
+def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
+
+def IIC_CMPX_LOCK : InstrItinClass;
+def IIC_CMPX_LOCK_8 : InstrItinClass;
+def IIC_CMPX_LOCK_8B : InstrItinClass;
+def IIC_CMPX_LOCK_16B : InstrItinClass;
+
+def IIC_XADD_LOCK_MEM : InstrItinClass;
+def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "X86ScheduleAtom.td"
+
+
+
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
new file mode 100644
index 000000000000..77d4e56d7c03
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -0,0 +1,305 @@
+//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Intel Atom (Bonnell)
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from the "Intel 64 and IA32 Architectures
+// Optimization Reference Manual", Chapter 13, Section 4.
+// Functional Units
+// Port 0
+def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
+ // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
+ // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomItineraries : ProcessorItineraries<
+ [ Port0, Port1 ],
+ [], [
+ // P0 only
+ // InstrItinData<class, [InstrStage<N, [P0]>] >,
+ // P0 or P1
+ // InstrItinData<class, [InstrStage<N, [P0, P1]>] >,
+ // P0 and P1
+ // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >,
+ //
+ // Default is 1 cycle, port0 or port1
+ InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >,
+ // mul
+ InstrItinData<IIC_MUL8, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_MUL64, [InstrStage<12, [Port0, Port1]>] >,
+ // imul by al, ax, eax, rax
+ InstrItinData<IIC_IMUL8, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL64, [InstrStage<12, [Port0, Port1]>] >,
+ // imul reg by reg|mem
+ InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >,
+ // imul reg = reg/mem * imm
+ InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >,
+ // idiv
+ InstrItinData<IIC_IDIV8, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_IDIV16, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_IDIV32, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_IDIV64, [InstrStage<130, [Port0, Port1]>] >,
+ // div
+ InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV16, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV32, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_DIV64, [InstrStage<130, [Port0, Port1]>] >,
+ // neg/not/inc/dec
+ InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
+ // add/sub/and/or/xor/adc/sbc/cmp/test
+ InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
+ // shift/rotate
+ InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
+ // shift double
+ InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >,
+ // cmov
+ InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
+ // set
+ InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
+ // jcc
+ InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
+ // jcxz/jecxz/jrcxz
+ InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >,
+ // jmp rel
+ InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >,
+ // jmp indirect
+ InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ // jmp far
+ InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >,
+ InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >,
+ // loop/loope/loopne
+ InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >,
+ // call - all but reg/imm
+ InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >,
+ InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >,
+ InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >,
+ //ret
+ InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >,
+ InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
+ //sign extension movs
+ InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >,
+ InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [Port0, Port1]>] >,
+ //zero extension movs
+ InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
+ InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
+
+ // SSE binary operations
+ // arithmetic fp scalar
+ InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+ // arithmetic fp parallel
+ InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >,
+
+ // bitwise parallel
+ InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >,
+
+ // arithmetic int parallel
+ InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+
+ // multiply int parallel
+ InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >,
+
+ // shift parallel
+ InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_SQRTP_RR, [InstrStage<13, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_SQRTP_RM, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_SQRTS_RR, [InstrStage<11, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_SQRTS_RM, [InstrStage<12, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >,
+ InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >,
+ InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
+ InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
+
+ // conversions
+ // to/from PD ...
+ InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
+ // to/from PS except to/from PD and PS2PI
+ InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >
+ ]>;
+
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 6406bce31187..9a04e352ab62 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -65,7 +65,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false,
- 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+ 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
DAG, dl);
return CallResult.second;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 7064dd06fa30..452dd7eba326 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -21,7 +21,6 @@
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -177,16 +176,18 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ unsigned MaxLevel;
union {
unsigned u[3];
char c[12];
} text;
-
- if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+
+ if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
+ MaxLevel < 1)
return;
X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
-
+
if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); }
if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); }
@@ -196,7 +197,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
// FIXME: AVX codegen support is not ready.
- //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); }
+ //if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -244,28 +245,69 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
IsBTMemSlow = true;
ToggleFeature(X86::FeatureSlowBTMem);
}
+
// If it's Nehalem, unaligned memory access is fast.
+ // FIXME: Nehalem is family 6. Also include Westmere and later processors?
if (Family == 15 && Model == 26) {
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
- X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 29) & 0x1) {
- HasX86_64 = true;
- ToggleFeature(X86::Feature64Bit);
- }
- if ((ECX >> 5) & 0x1) {
- HasLZCNT = true;
- ToggleFeature(X86::FeatureLZCNT);
+ // Set processor type. Currently only Atom is detected.
+ if (Family == 6 && Model == 28) {
+ X86ProcFamily = IntelAtom;
+ ToggleFeature(X86::FeatureLeaForSP);
}
- if (IsAMD && ((ECX >> 6) & 0x1)) {
- HasSSE4A = true;
- ToggleFeature(X86::FeatureSSE4A);
+
+ unsigned MaxExtLevel;
+ X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+ if (MaxExtLevel >= 0x80000001) {
+ X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 29) & 0x1) {
+ HasX86_64 = true;
+ ToggleFeature(X86::Feature64Bit);
+ }
+ if ((ECX >> 5) & 0x1) {
+ HasLZCNT = true;
+ ToggleFeature(X86::FeatureLZCNT);
+ }
+ if (IsAMD) {
+ if ((ECX >> 6) & 0x1) {
+ HasSSE4A = true;
+ ToggleFeature(X86::FeatureSSE4A);
+ }
+ if ((ECX >> 11) & 0x1) {
+ HasXOP = true;
+ ToggleFeature(X86::FeatureXOP);
+ }
+ if ((ECX >> 16) & 0x1) {
+ HasFMA4 = true;
+ ToggleFeature(X86::FeatureFMA4);
+ }
+ }
}
- if (IsAMD && ((ECX >> 16) & 0x1)) {
- HasFMA4 = true;
- ToggleFeature(X86::FeatureFMA4);
+ }
+
+ if (IsIntel && MaxLevel >= 7) {
+ if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) {
+ if (EBX & 0x1) {
+ HasFSGSBase = true;
+ ToggleFeature(X86::FeatureFSGSBase);
+ }
+ if ((EBX >> 3) & 0x1) {
+ HasBMI = true;
+ ToggleFeature(X86::FeatureBMI);
+ }
+ // FIXME: AVX2 codegen support is not ready.
+ //if ((EBX >> 5) & 0x1) {
+ // X86SSELevel = AVX2;
+ // ToggleFeature(X86::FeatureAVX2);
+ //}
+ if ((EBX >> 8) & 0x1) {
+ HasBMI2 = true;
+ ToggleFeature(X86::FeatureBMI2);
+ }
}
}
}
@@ -274,6 +316,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS,
unsigned StackAlignOverride, bool is64Bit)
: X86GenSubtargetInfo(TT, CPU, FS)
+ , X86ProcFamily(Others)
, PICStyle(PICStyles::None)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
@@ -281,31 +324,35 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasX86_64(false)
, HasPOPCNT(false)
, HasSSE4A(false)
- , HasAVX(false)
, HasAES(false)
, HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
+ , HasXOP(false)
, HasMOVBE(false)
, HasRDRAND(false)
, HasF16C(false)
+ , HasFSGSBase(false)
, HasLZCNT(false)
, HasBMI(false)
+ , HasBMI2(false)
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)
, HasCmpxchg16b(false)
- , stackAlignment(8)
+ , UseLeaForSP(false)
+ , PostRAScheduler(false)
+ , stackAlignment(4)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
, TargetTriple(TT)
- , In64BitMode(is64Bit)
- , InNaClMode(false) {
+ , In64BitMode(is64Bit) {
// Determine default and user specified characteristics
+ std::string CPUName = CPU;
if (!FS.empty() || !CPU.empty()) {
- std::string CPUName = CPU;
if (CPUName.empty()) {
-#if defined (__x86_64__) || defined(__i386__)
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
CPUName = sys::getHostCPUName();
#else
CPUName = "generic";
@@ -325,6 +372,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
// If feature string is not empty, parse features string.
ParseSubtargetFeatures(CPUName, FullFS);
} else {
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
// Otherwise, use CPUID to auto-detect feature set.
AutoDetectSubtargetFeatures();
@@ -333,7 +387,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
HasCMov = true; ToggleFeature(X86::FeatureCMOV);
- if (!HasAVX && X86SSELevel < SSE2) {
+ if (X86SSELevel < SSE2) {
X86SSELevel = SSE2;
ToggleFeature(X86::FeatureSSE1);
ToggleFeature(X86::FeatureSSE2);
@@ -341,28 +395,22 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
}
}
+ if (X86ProcFamily == IntelAtom) {
+ PostRAScheduler = true;
+ InstrItins = getInstrItineraryForCPU(CPUName);
+ }
+
// It's important to keep the MCSubtargetInfo feature bits in sync with
// target data structure which is shared with MC code emitter, etc.
if (In64BitMode)
ToggleFeature(X86::Mode64Bit);
- if (isTargetNaCl()) {
- InNaClMode = true;
- ToggleFeature(X86::ModeNaCl);
- }
-
- if (HasAVX)
- X86SSELevel = NoMMXSSE;
-
DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel
<< ", 64bit " << HasX86_64 << "\n");
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- if(EnableSegmentedStacks && !isTargetELF())
- report_fatal_error("Segmented stacks are only implemented on ELF.");
-
// Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
@@ -371,3 +419,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
isTargetSolaris() || In64BitMode)
stackAlignment = 16;
}
+
+bool X86Subtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 3258d3d0ada3..7fd832bf0678 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -1,4 +1,4 @@
-//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,9 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "llvm/CallingConv.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/CallingConv.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -42,13 +42,20 @@ enum Style {
class X86Subtarget : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2
};
enum X863DNowEnum {
NoThreeDNow, ThreeDNow, ThreeDNowA
};
+ enum X86ProcFamilyEnum {
+ Others, IntelAtom
+ };
+
+ /// X86ProcFamily - X86 processor family: Intel Atom, and others
+ X86ProcFamilyEnum X86ProcFamily;
+
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
@@ -75,9 +82,6 @@ protected:
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
- /// HasAVX - Target has AVX instructions
- bool HasAVX;
-
/// HasAES - Target has AES instructions
bool HasAES;
@@ -90,6 +94,9 @@ protected:
/// HasFMA4 - Target has 4-operand fused multiply-add
bool HasFMA4;
+ /// HasXOP - Target has XOP instructions
+ bool HasXOP;
+
/// HasMOVBE - True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -99,12 +106,18 @@ protected:
/// HasF16C - Processor has 16-bit floating point conversion instructions.
bool HasF16C;
+ /// HasFSGSBase - Processor has FS/GS base insturctions.
+ bool HasFSGSBase;
+
/// HasLZCNT - Processor has LZCNT instruction.
bool HasLZCNT;
/// HasBMI - Processor has BMI1 instructions.
bool HasBMI;
+ /// HasBMI2 - Processor has BMI2 instructions.
+ bool HasBMI2;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -119,6 +132,13 @@ protected:
/// this is true for most x86-64 chips, but not the first AMD chips.
bool HasCmpxchg16b;
+ /// UseLeaForSP - True if the LEA instruction should be used for adjusting
+ /// the stack pointer. This is an optimization for Intel Atom processors.
+ bool UseLeaForSP;
+
+ /// PostRAScheduler - True if using post-register-allocation scheduler.
+ bool PostRAScheduler;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -129,14 +149,14 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+
+ /// Instruction itineraries for scheduling
+ InstrItineraryData InstrItins;
private:
/// In64BitMode - True if compiling for 64-bit, false for 32-bit.
bool In64BitMode;
- /// InNaClMode - True if compiling for Native Client target.
- bool InNaClMode;
-
public:
/// This constructor initializes the data members to match that
@@ -176,26 +196,31 @@ public:
bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
bool hasSSE41() const { return X86SSELevel >= SSE41; }
bool hasSSE42() const { return X86SSELevel >= SSE42; }
+ bool hasAVX() const { return X86SSELevel >= AVX; }
+ bool hasAVX2() const { return X86SSELevel >= AVX2; }
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
- bool hasAVX() const { return HasAVX; }
- bool hasXMM() const { return hasSSE1() || hasAVX(); }
- bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
+ bool hasXOP() const { return HasXOP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
+ bool hasFSGSBase() const { return HasFSGSBase; }
bool hasLZCNT() const { return HasLZCNT; }
bool hasBMI() const { return HasBMI; }
+ bool hasBMI2() const { return HasBMI2; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+ bool useLeaForSP() const { return UseLeaForSP; }
+
+ bool isAtom() const { return X86ProcFamily == IntelAtom; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -209,38 +234,28 @@ public:
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
- bool isTargetELF() const {
- return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
- }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetNaCl() const {
return TargetTriple.getOS() == Triple::NativeClient;
}
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
-
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
- bool isTargetCygMing() const {
- return isTargetMingw() || isTargetCygwin();
- }
-
- /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
- bool isTargetCOFF() const {
- return isTargetMingw() || isTargetCygwin() || isTargetWindows();
- }
+ bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
+ bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
+ bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); }
bool isTargetWin64() const {
// FIXME: x86_64-cygwin has not been released yet.
- return In64BitMode && (isTargetCygMing() || isTargetWindows());
- }
-
- bool isTargetEnvMacho() const {
- return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+ return In64BitMode && TargetTriple.isOSWindows();
}
bool isTargetWin32() const {
+ // FIXME: Cygwin is included for isTargetWin64 -- should it be included
+ // here too?
return !In64BitMode && (isTargetMingw() || isTargetWindows());
}
@@ -286,6 +301,15 @@ public:
/// indicating the number of scheduling cycles of backscheduling that
/// should be attempted.
unsigned getSpecialAddressLatency() const;
+
+ /// enablePostRAScheduler - run for Atom optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins = Return the instruction itineraries based on the
+ /// subtarget selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 15c6c4e7a7d2..f4b7a6277ade 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -28,11 +28,14 @@ extern "C" void LLVMInitializeX86Target() {
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
}
+void X86_32TargetMachine::anchor() { }
X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, false),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
"e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
"n8:16:32-S128" :
@@ -48,11 +51,14 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
JITInfo(*this) {
}
+void X86_64TargetMachine::anchor() { }
X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, true),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
"n8:16:32:64-S128"),
InstrInfo(*this),
@@ -65,12 +71,15 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
///
X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
- Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
- ELFWriterInfo(is64Bit, true) {
+ ELFWriterInfo(is64Bit, true),
+ InstrItins(Subtarget.getInstrItineraryData()){
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
@@ -92,8 +101,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
}
// default to hard float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Hard;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Hard;
}
//===----------------------------------------------------------------------===//
@@ -102,46 +111,67 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
static cl::opt<bool>
UseVZeroUpper("x86-use-vzeroupper",
cl::desc("Minimize AVX to SSE transition penalty"),
- cl::init(false));
+ cl::init(true));
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
-bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+namespace {
+/// X86 Code Generator Pass Configuration Options.
+class X86PassConfig : public TargetPassConfig {
+public:
+ X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ X86TargetMachine &getX86TargetMachine() const {
+ return getTM<X86TargetMachine>();
+ }
+
+ const X86Subtarget &getX86Subtarget() const {
+ return *getX86TargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new X86PassConfig(this, PM);
+}
+
+bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
- PM.add(createX86ISelDag(*this, OptLevel));
+ PM.add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
- if (!Subtarget.is64Bit())
+ if (!getX86Subtarget().is64Bit())
PM.add(createGlobalBaseRegPass());
return false;
}
-bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86PassConfig::addPreRegAlloc() {
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
-bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86PassConfig::addPostRegAlloc() {
PM.add(createX86FloatingPointStackifierPass());
return true; // -print-machineinstr should print after this.
}
-bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86PassConfig::addPreEmitPass() {
bool ShouldPrint = false;
- if (OptLevel != CodeGenOpt::None &&
- (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
+ if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
ShouldPrint = true;
}
- if (Subtarget.hasAVX() && UseVZeroUpper) {
+ if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
PM.add(createX86IssueVZeroUpperPass());
ShouldPrint = true;
}
@@ -150,7 +180,6 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
}
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index d1569aa9d751..8e935af67fe3 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -27,19 +27,20 @@
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
-
-class formatted_raw_ostream;
+
class StringRef;
class X86TargetMachine : public LLVMTargetMachine {
- X86Subtarget Subtarget;
- X86FrameLowering FrameLowering;
- X86ELFWriterInfo ELFWriterInfo;
+ X86Subtarget Subtarget;
+ X86FrameLowering FrameLowering;
+ X86ELFWriterInfo ELFWriterInfo;
+ InstrItineraryData InstrItins;
public:
- X86TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const {
@@ -55,7 +56,7 @@ public:
virtual const X86TargetLowering *getTargetLowering() const {
llvm_unreachable("getTargetLowering not implemented");
}
- virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
llvm_unreachable("getSelectionDAGInfo not implemented");
}
virtual const X86RegisterInfo *getRegisterInfo() const {
@@ -64,19 +65,21 @@ public:
virtual const X86ELFWriterInfo *getELFWriterInfo() const {
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
}
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
// Set up the pass pipeline.
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
};
/// X86_32TargetMachine - X86 32-bit target machine.
///
class X86_32TargetMachine : public X86TargetMachine {
+ virtual void anchor();
const TargetData DataLayout; // Calculates type size & alignment
X86InstrInfo InstrInfo;
X86SelectionDAGInfo TSInfo;
@@ -84,13 +87,14 @@ class X86_32TargetMachine : public X86TargetMachine {
X86JITInfo JITInfo;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
- virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
virtual const X86InstrInfo *getInstrInfo() const {
@@ -104,6 +108,7 @@ public:
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
+ virtual void anchor();
const TargetData DataLayout; // Calculates type size & alignment
X86InstrInfo InstrInfo;
X86SelectionDAGInfo TSInfo;
@@ -111,13 +116,14 @@ class X86_64TargetMachine : public X86TargetMachine {
X86JITInfo JITInfo;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
- virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
virtual const X86InstrInfo *getInstrInfo() const {
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 991f322a6346..718f35ea84ac 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//===-- X86TargetObjectFile.cpp - X86 Object Info -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index d7adf27ecaef..a02a36809ef2 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//===-- X86TargetObjectFile.h - X86 Object Info -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,7 +15,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
- class X86TargetMachine;
/// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
/// x86-64.
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 39584942468d..2fd78a7231c6 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -14,14 +14,16 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-codegen"
+#define DEBUG_TYPE "x86-vzeroupper"
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -41,6 +43,60 @@ namespace {
private:
const TargetInstrInfo *TII; // Machine instruction info.
MachineBasicBlock *MBB; // Current basic block
+
+ // Any YMM register live-in to this function?
+ bool FnHasLiveInYmm;
+
+ // BBState - Contains the state of each MBB: unknown, clean, dirty
+ SmallVector<uint8_t, 8> BBState;
+
+ // BBSolved - Keep track of all MBB which had been already analyzed
+ // and there is no further processing required.
+ BitVector BBSolved;
+
+ // Machine Basic Blocks are classified according this pass:
+ //
+ // ST_UNKNOWN - The MBB state is unknown, meaning from the entry state
+ // until the MBB exit there isn't a instruction using YMM to change
+ // the state to dirty, or one of the incoming predecessors is unknown
+ // and there's not a dirty predecessor between them.
+ //
+ // ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have
+ // instructions using YMM and be marked ST_CLEAN, as long as the state
+ // is cleaned by a vzeroupper before any call.
+ //
+ // ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a
+ // vzeroupper instruction.
+ //
+ // ST_INIT - Placeholder for an empty state set
+ //
+ enum {
+ ST_UNKNOWN = 0,
+ ST_CLEAN = 1,
+ ST_DIRTY = 2,
+ ST_INIT = 3
+ };
+
+ // computeState - Given two states, compute the resulting state, in
+ // the following way
+ //
+ // 1) One dirty state yields another dirty state
+ // 2) All states must be clean for the result to be clean
+ // 3) If none above and one unknown, the result state is also unknown
+ //
+ unsigned computeState(unsigned PrevState, unsigned CurState) {
+ if (PrevState == ST_INIT)
+ return CurState;
+
+ if (PrevState == ST_DIRTY || CurState == ST_DIRTY)
+ return ST_DIRTY;
+
+ if (PrevState == ST_CLEAN && CurState == ST_CLEAN)
+ return ST_CLEAN;
+
+ return ST_UNKNOWN;
+ }
+
};
char VZeroUpperInserter::ID = 0;
}
@@ -49,37 +105,82 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() {
return new VZeroUpperInserter();
}
+static bool isYmmReg(unsigned Reg) {
+ if (Reg >= X86::YMM0 && Reg <= X86::YMM15)
+ return true;
+
+ return false;
+}
+
+static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
+ for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
+ E = MRI.livein_end(); I != E; ++I)
+ if (isYmmReg(I->first))
+ return true;
+
+ return false;
+}
+
+static bool hasYmmReg(MachineInstr *MI) {
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDebug())
+ continue;
+ if (isYmmReg(MO.getReg()))
+ return true;
+ }
+ return false;
+}
+
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
- bool Changed = false;
-
- // Process any unreachable blocks in arbitrary order now.
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- Changed |= processBasicBlock(MF, *BB);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool EverMadeChange = false;
- return Changed;
-}
+ // Fast check: if the function doesn't use any ymm registers, we don't need
+ // to insert any VZEROUPPER instructions. This is constant-time, so it is
+ // cheap in the common case of no ymm use.
+ bool YMMUsed = false;
+ const TargetRegisterClass *RC = X86::VR256RegisterClass;
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
+ i != e; i++) {
+ if (MRI.isPhysRegUsed(*i)) {
+ YMMUsed = true;
+ break;
+ }
+ }
+ if (!YMMUsed)
+ return EverMadeChange;
-static bool isCallToModuleFn(const MachineInstr *MI) {
- assert(MI->getDesc().isCall() && "Isn't a call instruction");
+ // Pre-compute the existence of any live-in YMM registers to this function
+ FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
- for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ assert(BBState.empty());
+ BBState.resize(MF.getNumBlockIDs(), 0);
+ BBSolved.resize(MF.getNumBlockIDs(), 0);
- if (!MO.isGlobal())
- continue;
+ // Each BB state depends on all predecessors, loop over until everything
+ // converges. (Once we converge, we can implicitly mark everything that is
+ // still ST_UNKNOWN as ST_CLEAN.)
+ while (1) {
+ bool MadeChange = false;
- const GlobalValue *GV = MO.getGlobal();
- GlobalValue::LinkageTypes LT = GV->getLinkage();
- if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
- (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
- return true;
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ MadeChange |= processBasicBlock(MF, *I);
- return false;
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
}
- return false;
+
+ BBState.clear();
+ BBSolved.clear();
+ return EverMadeChange;
}
/// processBasicBlock - Loop over all of the instructions in the basic block,
@@ -87,19 +188,98 @@ static bool isCallToModuleFn(const MachineInstr *MI) {
bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
MachineBasicBlock &BB) {
bool Changed = false;
+ unsigned BBNum = BB.getNumber();
MBB = &BB;
+ // Don't process already solved BBs
+ if (BBSolved[BBNum])
+ return false; // No changes
+
+ // Check the state of all predecessors
+ unsigned EntryState = ST_INIT;
+ for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]);
+ if (EntryState == ST_DIRTY)
+ break;
+ }
+
+
+ // The entry MBB for the function may set the inital state to dirty if
+ // the function receives any YMM incoming arguments
+ if (MBB == MF.begin()) {
+ EntryState = ST_CLEAN;
+ if (FnHasLiveInYmm)
+ EntryState = ST_DIRTY;
+ }
+
+ // The current state is initialized according to the predecessors
+ unsigned CurState = EntryState;
+ bool BBHasCall = false;
+
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
DebugLoc dl = I->getDebugLoc();
+ bool isControlFlow = MI->isCall() || MI->isReturn();
+
+ // Shortcut: don't need to check regular instructions in dirty state.
+ if (!isControlFlow && CurState == ST_DIRTY)
+ continue;
+
+ if (hasYmmReg(MI)) {
+ // We found a ymm-using instruction; this could be an AVX instruction,
+ // or it could be control flow.
+ CurState = ST_DIRTY;
+ continue;
+ }
- // Insert a vzeroupper instruction before each control transfer
- // to functions outside this module
- if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
- BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
- ++NumVZU;
+ // Check for control-flow out of the current function (which might
+ // indirectly execute SSE instructions).
+ if (!isControlFlow)
+ continue;
+
+ BBHasCall = true;
+
+ // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
+ // registers. This instruction has zero latency. In addition, the processor
+ // changes back to Clean state, after which execution of Intel SSE
+ // instructions or Intel AVX instructions has no transition penalty. Add
+ // the VZEROUPPER instruction before any function call/return that might
+ // execute SSE code.
+ // FIXME: In some cases, we may want to move the VZEROUPPER into a
+ // predecessor block.
+ if (CurState == ST_DIRTY) {
+ // Only insert the VZEROUPPER in case the entry state isn't unknown.
+ // When unknown, only compute the information within the block to have
+ // it available in the exit if possible, but don't change the block.
+ if (EntryState != ST_UNKNOWN) {
+ BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ ++NumVZU;
+ }
+
+ // After the inserted VZEROUPPER the state becomes clean again, but
+ // other YMM may appear before other subsequent calls or even before
+ // the end of the BB.
+ CurState = ST_CLEAN;
}
}
+ DEBUG(dbgs() << "MBB #" << BBNum
+ << ", current state: " << CurState << '\n');
+
+ // A BB can only be considered solved when we both have done all the
+ // necessary transformations, and have computed the exit state. This happens
+ // in two cases:
+ // 1) We know the entry state: this immediately implies the exit state and
+ // all the necessary transformations.
+ // 2) There are no calls, and and a non-call instruction marks this block:
+ // no transformations are necessary, and we know the exit state.
+ if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN))
+ BBSolved[BBNum] = true;
+
+ if (CurState != BBState[BBNum])
+ Changed = true;
+
+ BBState[BBNum] = CurState;
return Changed;
}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 3dc51e1991ed..0d59572a0d57 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -1,11 +1,11 @@
set(LLVM_TARGET_DEFINITIONS XCore.td)
-llvm_tablegen(XCoreGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(XCoreGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(XCoreGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM XCoreGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(XCoreCommonTableGen)
add_llvm_target(XCoreCodeGen
@@ -14,6 +14,7 @@ add_llvm_target(XCoreCodeGen
XCoreInstrInfo.cpp
XCoreISelDAGToDAG.cpp
XCoreISelLowering.cpp
+ XCoreMachineFunctionInfo.cpp
XCoreRegisterInfo.cpp
XCoreSubtarget.cpp
XCoreTargetMachine.cpp
@@ -21,17 +22,5 @@ add_llvm_target(XCoreCodeGen
XCoreSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMXCoreDesc
- LLVMXCoreInfo
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
new file mode 100644
index 000000000000..53b4a9e3f5f7
--- /dev/null
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/XCore/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = XCore
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = XCoreCodeGen
+parent = XCore
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
index 269822db7113..3a3f5b4cc63e 100644
--- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc
XCoreMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreDesc
- LLVMMC
- LLVMXCoreInfo
- )
-
add_dependencies(LLVMXCoreDesc XCoreCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..a80c939b4372
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreDesc
+parent = XCore
+required_libraries = MC XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 42ab1b31d57a..1cfdbda003b5 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -8,8 +8,11 @@
//===----------------------------------------------------------------------===//
#include "XCoreMCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
using namespace llvm;
+void XCoreMCAsmInfo::anchor() { }
+
XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
SupportsDebugInformation = true;
Data16bitsDirective = "\t.short\t";
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 840392263881..076777541e33 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//===-- XCoreMCAsmInfo.h - XCore asm properties ----------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,13 +14,14 @@
#ifndef XCORETARGETASMINFO_H
#define XCORETARGETASMINFO_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
+ class StringRef;
class Target;
class XCoreMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
public:
explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
};
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 276e841e6acc..bbfdd4356f2a 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -61,9 +62,10 @@ static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 3cfc3764a62c..a255adb2e0f2 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -15,9 +15,7 @@
#define XCOREMCTARGETDESC_H
namespace llvm {
-class MCSubtargetInfo;
class Target;
-class StringRef;
extern Target TheXCoreTarget;
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index 7f84f6904305..2c34b8730c85 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo
XCoreTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMXCoreInfo XCoreCommonTableGen)
diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..770ba87e4a39
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreInfo
+parent = XCore
+required_libraries = MC Support Target
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index b8fb0cac319b..08f091e5b870 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -24,7 +24,8 @@ namespace llvm {
class XCoreTargetMachine;
class formatted_raw_ostream;
- FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
} // end namespace llvm;
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 38401895e634..04a1dd5e95be 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -1,4 +1,4 @@
-//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//===-- XCore.td - Describe the XCore Target Machine -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
+// This is the top level entry point for the XCore target.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 7f8b169819a7..50fda58cf574 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCore.h"
#include "XCoreFrameLowering.h"
+#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
#include "llvm/Function.h"
@@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
}
bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
- return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects();
}
void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -92,8 +93,6 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = &MF.getMMI();
- const XCoreRegisterInfo *RegInfo =
- static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
@@ -118,7 +117,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
// FIXME could emit multiple instructions.
report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
}
- bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF);
// Do we need to allocate space on the stack?
if (FrameSize) {
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index c591e93780aa..4c51aa5e79cc 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//===-- XCoreFrameLowering.h - Frame info for XCore Target ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 4dac1cee9827..7564fbad7d45 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -41,8 +41,8 @@ namespace {
const XCoreSubtarget &Subtarget;
public:
- XCoreDAGToDAGISel(XCoreTargetMachine &TM)
- : SelectionDAGISel(TM),
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
@@ -83,8 +83,9 @@ namespace {
/// createXCoreISelDag - This pass converts a legalized DAG into a
/// XCore-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
- return new XCoreDAGToDAGISel(TM);
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new XCoreDAGToDAGISel(TM, OptLevel);
}
bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
@@ -120,7 +121,7 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
ConstantSDNode *CN = 0;
if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
&& (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && (CN->getSExtValue() % 4 == 0)) {
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
// Constant word offset from a object in the data region
Base = Addr.getOperand(0).getOperand(0);
Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
@@ -141,7 +142,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
ConstantSDNode *CN = 0;
if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
&& (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && (CN->getSExtValue() % 4 == 0)) {
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
// Constant word offset from a object in the data region
Base = Addr.getOperand(0).getOperand(0);
Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 2afe0e35afb1..fdf2b783241c 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===//
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -36,7 +36,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/VectorExtras.h"
using namespace llvm;
const char *XCoreTargetLowering::
@@ -109,6 +108,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -186,7 +187,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
- return SDValue();
}
}
@@ -198,7 +198,6 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this!");
- return;
case ISD::ADD:
case ISD::SUB:
Results.push_back(ExpandADDSUB(N, DAG));
@@ -274,9 +273,8 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
}
- if (! GVar) {
+ if (!GVar) {
llvm_unreachable("Thread local object not a GlobalVariable?");
- return SDValue();
}
Type *Ty = cast<PointerType>(GV->getType())->getElementType();
if (!Ty->isSized() || isZeroLengthArray(Ty)) {
@@ -386,6 +384,15 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
Offset = off;
return true;
}
+ // Check for an aligned global variable.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
+ const GlobalValue *GV = GA->getGlobal();
+ if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ }
return false;
}
@@ -418,7 +425,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
//
return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
// Lower to
// ldw low, base[offset >> 2]
@@ -435,9 +442,11 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
- LowAddr, MachinePointerInfo(), false, false, 0);
+ LowAddr, MachinePointerInfo(),
+ false, false, false, 0);
SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
- HighAddr, MachinePointerInfo(), false, false, 0);
+ HighAddr, MachinePointerInfo(),
+ false, false, false, 0);
SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
@@ -478,8 +487,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, IntPtrTy, false, false,
- false, false, 0, CallingConv::C, false,
- /*isReturnValueUsed=*/true,
+ false, false, 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
Args, DAG, DL);
@@ -540,8 +549,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
- false, false, 0, CallingConv::C, false,
- /*isReturnValueUsed=*/true,
+ false, false, 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
Args, DAG, dl);
@@ -745,14 +754,14 @@ SDValue XCoreTargetLowering::
LowerVAARG(SDValue Op, SelectionDAG &DAG) const
{
llvm_unreachable("unimplemented");
- // FIX Arguments passed by reference need a extra dereference.
+ // FIXME Arguments passed by reference need a extra dereference.
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
Node->getOperand(1), MachinePointerInfo(V),
- false, false, 0);
+ false, false, false, 0);
// Increment the pointer, VAList, to the next vararg
SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
DAG.getConstant(VT.getSizeInBits(),
@@ -762,7 +771,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -866,7 +875,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
SDValue
XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1137,13 +1146,13 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ false, false, false, 0));
}
}
if (isVarArg) {
/* Argument registers */
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
XCore::R0, XCore::R1, XCore::R2, XCore::R3
};
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
@@ -1354,8 +1363,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
- if (KnownZero == Mask) {
+ DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ if ((KnownZero & Mask) == Mask) {
SDValue Carry = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
SDValue Ops [] = { Carry, Result };
@@ -1377,8 +1386,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
- if (KnownZero == Mask) {
+ DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ if ((KnownZero & Mask) == Mask) {
SDValue Borrow = N2;
SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, VT), N2);
@@ -1393,8 +1402,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
- if (KnownZero == Mask) {
+ DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ if ((KnownZero & Mask) == Mask) {
SDValue Borrow = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
SDValue Ops [] = { Borrow, Result };
@@ -1512,21 +1521,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
}
void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case XCoreISD::LADD:
case XCoreISD::LSUB:
if (Op.getResNo() == 0) {
// Top bits of carry / borrow are clear.
- KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
- Mask.getBitWidth() - 1);
- KnownZero &= Mask;
+ KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
+ KnownZero.getBitWidth() - 1);
}
break;
}
@@ -1590,8 +1597,6 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
// reg + reg<<2
return AM.Scale == 4 && AM.BaseOffs == 0;
}
-
- return false;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index d6c5b329a0a0..0b63ecd0f78e 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -15,9 +15,9 @@
#ifndef XCOREISELLOWERING_H
#define XCOREISELLOWERING_H
+#include "XCore.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-#include "XCore.h"
namespace llvm {
@@ -160,7 +160,6 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -175,9 +174,8 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 8002c993270c..1963a70fb30d 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -1,4 +1,4 @@
-//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//===-- XCoreInstrFormats.td - XCore Instruction Formats ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index a0946a197a1a..0a3008d7ab33 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//===-- XCoreInstrInfo.cpp - XCore Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCoreMachineFunctionInfo.h"
#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
#include "llvm/MC/MCContext.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index d354802ee03f..42eeed8370f4 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -1,4 +1,4 @@
-//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//===-- XCoreInstrInfo.h - XCore Instruction Information --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,8 +14,8 @@
#ifndef XCOREINSTRUCTIONINFO_H
#define XCOREINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "XCoreRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "XCoreGenInstrInfo.inc"
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 4d2e93bc7a04..b25a08d25c1a 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -1,4 +1,4 @@
-//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//===-- XCoreInstrInfo.td - Target Description for XCore ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..7ca06729120e
--- /dev/null
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void XCoreFunctionInfo::anchor() { }
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index a575a0f69541..f869fcf26de3 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,7 +26,7 @@ class Function;
/// XCoreFunctionInfo - This class is derived from MachineFunction private
/// XCore target-specific information for each MachineFunction.
class XCoreFunctionInfo : public MachineFunctionInfo {
-private:
+ virtual void anchor();
bool UsesLR;
int LRSpillSlot;
int FPSpillSlot;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1b78b373fffa..f3b4b4c4f88a 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//===-- XCoreRegisterInfo.cpp - XCore Register Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
#include "XCoreRegisterInfo.h"
#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,8 +26,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
@@ -54,28 +54,14 @@ static inline bool isImmU16(unsigned val) {
return val < (1 << 16);
}
-static const unsigned XCore_ArgRegs[] = {
- XCore::R0, XCore::R1, XCore::R2, XCore::R3
-};
-
-const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
-{
- return XCore_ArgRegs;
-}
-
-unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
-{
- return array_lengthof(XCore_ArgRegs);
-}
-
bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
return MF.getMMI().hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
}
-const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+const uint16_t* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
- static const unsigned CalleeSavedRegs[] = {
+ static const uint16_t CalleeSavedRegs[] = {
XCore::R4, XCore::R5, XCore::R6, XCore::R7,
XCore::R8, XCore::R9, XCore::R10, XCore::LR,
0
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 5c28f39d8788..7391cfdf0734 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//===-- XCoreRegisterInfo.h - XCore Register Information Impl ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -44,7 +44,7 @@ public:
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -62,15 +62,6 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
- //! Return the array of argument passing registers
- /*!
- \note The size of this array is returned by getArgRegsSize().
- */
- static const unsigned *getArgRegs(const MachineFunction *MF = 0);
-
- //! Return the size of the argument passing register array
- static unsigned getNumArgRegs(const MachineFunction *MF = 0);
-
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
};
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index c3542304a4ec..9edfda1f5007 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//===-- XCoreRegisterInfo.td - XCore Register defs ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index b4e992710419..8cfb77089f31 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//===-- XCoreSubtarget.cpp - XCore Subtarget Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,6 +21,8 @@
using namespace llvm;
+void XCoreSubtarget::anchor() { }
+
XCoreSubtarget::XCoreSubtarget(const std::string &TT,
const std::string &CPU, const std::string &FS)
: XCoreGenSubtargetInfo(TT, CPU, FS)
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 7b29fa236710..8d0f254e087a 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//===-- XCoreSubtarget.h - Define Subtarget for the XCore -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,6 +25,7 @@ namespace llvm {
class StringRef;
class XCoreSubtarget : public XCoreGenSubtargetInfo {
+ virtual void anchor();
public:
/// This constructor initializes the data members to match that
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index fdc5d35036bb..f65297e54a79 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "XCore.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -21,8 +22,10 @@ using namespace llvm;
///
XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
@@ -32,9 +35,27 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
TSInfo(*this) {
}
-bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createXCoreISelDag(*this));
+namespace {
+/// XCore Code Generator Pass Configuration Options.
+class XCorePassConfig : public TargetPassConfig {
+public:
+ XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ XCoreTargetMachine &getXCoreTargetMachine() const {
+ return getTM<XCoreTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+};
+} // namespace
+
+TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new XCorePassConfig(this, PM);
+}
+
+bool XCorePassConfig::addInstSelector() {
+ PM.add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
return false;
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 83d09d6df49d..254668142aaf 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,13 +14,13 @@
#ifndef XCORETARGETMACHINE_H
#define XCORETARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
#include "XCoreFrameLowering.h"
#include "XCoreSubtarget.h"
#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
#include "XCoreSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
namespace llvm {
@@ -33,8 +33,9 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const XCoreFrameLowering *getFrameLowering() const {
@@ -55,7 +56,7 @@ public:
virtual const TargetData *getTargetData() const { return &DataLayout; }
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 7424c78be305..27875e783b33 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//===-- XCoreTargetObjectFile.h - XCore Object Info -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index 10e0cc6b5691..de1353e6c12d 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -3,4 +3,5 @@ add_subdirectory(Instrumentation)
add_subdirectory(InstCombine)
add_subdirectory(Scalar)
add_subdirectory(IPO)
+add_subdirectory(Vectorize)
add_subdirectory(Hello)
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4d8dbc2189a6..58b3551cd7a2 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,13 +20,3 @@ add_llvm_library(LLVMipo
StripDeadPrototypes.cpp
StripSymbols.cpp
)
-
-add_llvm_library_dependencies(LLVMipo
- LLVMAnalysis
- LLVMCore
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- LLVMipa
- )
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index c3ecb7afff7a..d8fae8a4b2b9 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -140,18 +140,24 @@ bool ConstantMerge::runOnModule(Module &M) {
UsedGlobals.count(GV))
continue;
+ // This transformation is legal for weak ODR globals in the sense it
+ // doesn't change semantics, but we really don't want to perform it
+ // anyway; it's likely to pessimize code generation, and some tools
+ // (like the Darwin linker in cases involving CFString) don't expect it.
+ if (GV->isWeakForLinker())
+ continue;
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
GlobalVariable *&Slot = CMap[Pair];
- // If this is the first constant we find or if the old on is local,
- // replace with the current one. It the current is externally visible
+ // If this is the first constant we find or if the old one is local,
+ // replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
- if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
+ if (Slot == 0 || IsBetterCannonical(*GV, *Slot))
Slot = GV;
- }
}
// Second: identify all globals that can be merged together, filling in
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4bb6f7a90e93..95aef272211e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -74,7 +74,7 @@ namespace {
std::string getDescription() const {
return std::string((IsArg ? "Argument #" : "Return value #"))
- + utostr(Idx) + " of function " + F->getNameStr();
+ + utostr(Idx) + " of function " + F->getName().str();
}
};
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 0edf3427507b..f3f622843340 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
@@ -225,31 +226,247 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
+namespace {
+ // For a given pointer Argument, this retains a list of Arguments of functions
+ // in the same SCC that the pointer data flows into. We use this to build an
+ // SCC of the arguments.
+ struct ArgumentGraphNode {
+ Argument *Definition;
+ SmallVector<ArgumentGraphNode*, 4> Uses;
+ };
+
+ class ArgumentGraph {
+ // We store pointers to ArgumentGraphNode objects, so it's important that
+ // that they not move around upon insert.
+ typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+
+ ArgumentMapTy ArgumentMap;
+
+ // There is no root node for the argument graph, in fact:
+ // void f(int *x, int *y) { if (...) f(x, y); }
+ // is an example where the graph is disconnected. The SCCIterator requires a
+ // single entry point, so we maintain a fake ("synthetic") root node that
+ // uses every node. Because the graph is directed and nothing points into
+ // the root, it will not participate in any SCCs (except for its own).
+ ArgumentGraphNode SyntheticRoot;
+
+ public:
+ ArgumentGraph() { SyntheticRoot.Definition = 0; }
+
+ typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+
+ iterator begin() { return SyntheticRoot.Uses.begin(); }
+ iterator end() { return SyntheticRoot.Uses.end(); }
+ ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+ ArgumentGraphNode *operator[](Argument *A) {
+ ArgumentGraphNode &Node = ArgumentMap[A];
+ Node.Definition = A;
+ SyntheticRoot.Uses.push_back(&Node);
+ return &Node;
+ }
+ };
+
+ // This tracker checks whether callees are in the SCC, and if so it does not
+ // consider that a capture, instead adding it to the "Uses" list and
+ // continuing with the analysis.
+ struct ArgumentUsesTracker : public CaptureTracker {
+ ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
+ : Captured(false), SCCNodes(SCCNodes) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool shouldExplore(Use *U) { return true; }
+
+ bool captured(Use *U) {
+ CallSite CS(U->getUser());
+ if (!CS.getInstruction()) { Captured = true; return true; }
+
+ Function *F = CS.getCalledFunction();
+ if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
+
+ Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
+ PI != PE; ++PI, ++AI) {
+ if (AI == AE) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ Captured = true;
+ return true;
+ }
+ if (PI == U) {
+ Uses.push_back(AI);
+ break;
+ }
+ }
+ assert(!Uses.empty() && "Capturing call-site captured nothing?");
+ return false;
+ }
+
+ bool Captured; // True only if certainly captured (used outside our SCC).
+ SmallVector<Argument*, 4> Uses; // Uses within our SCC.
+
+ const SmallPtrSet<Function*, 8> &SCCNodes;
+ };
+}
+
+namespace llvm {
+ template<> struct GraphTraits<ArgumentGraphNode*> {
+ typedef ArgumentGraphNode NodeType;
+ typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+
+ static inline NodeType *getEntryNode(NodeType *A) { return A; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->Uses.begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->Uses.end();
+ }
+ };
+ template<> struct GraphTraits<ArgumentGraph*>
+ : public GraphTraits<ArgumentGraphNode*> {
+ static NodeType *getEntryNode(ArgumentGraph *AG) {
+ return AG->getEntryNode();
+ }
+ static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+ return AG->begin();
+ }
+ static ChildIteratorType nodes_end(ArgumentGraph *AG) {
+ return AG->end();
+ }
+ };
+}
+
/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
bool Changed = false;
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F && !F->isDeclaration() && !F->mayBeOverridden())
+ SCCNodes.insert(F);
+ }
+
+ ArgumentGraph AG;
+
// Check each function in turn, determining which pointer arguments are not
// captured.
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
if (F == 0)
- // External node - skip it;
+ // External node - only a problem for arguments that we pass to it.
continue;
// Definitions with weak linkage may be overridden at linktime with
- // something that writes memory, so treat them like declarations.
+ // something that captures pointers, so treat them like declarations.
if (F->isDeclaration() || F->mayBeOverridden())
continue;
+ // Functions that are readonly (or readnone) and nounwind and don't return
+ // a value can't capture arguments. Don't analyze them.
+ if (F->onlyReadsMemory() && F->doesNotThrow() &&
+ F->getReturnType()->isVoidTy()) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+ A->addAttr(Attribute::NoCapture);
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+ continue;
+ }
+
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
- if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
- !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
- A->addAttr(Attribute::NoCapture);
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+ ArgumentUsesTracker Tracker(SCCNodes);
+ PointerMayBeCaptured(A, &Tracker);
+ if (!Tracker.Captured) {
+ if (Tracker.Uses.empty()) {
+ // If it's trivially not captured, mark it nocapture now.
+ A->addAttr(Attribute::NoCapture);
+ ++NumNoCapture;
+ Changed = true;
+ } else {
+ // If it's not trivially captured and not trivially not captured,
+ // then it must be calling into another function in our SCC. Save
+ // its particulars for Argument-SCC analysis later.
+ ArgumentGraphNode *Node = AG[A];
+ for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
+ UE = Tracker.Uses.end(); UI != UE; ++UI)
+ Node->Uses.push_back(AG[*UI]);
+ }
+ }
+ // Otherwise, it's captured. Don't bother doing SCC analysis on it.
+ }
+ }
+
+ // The graph we've collected is partial because we stopped scanning for
+ // argument uses once we solved the argument trivially. These partial nodes
+ // show up as ArgumentGraphNode objects with an empty Uses list, and for
+ // these nodes the final decision about whether they capture has already been
+ // made. If the definition doesn't have a 'nocapture' attribute by now, it
+ // captures.
+
+ for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG), E = scc_end(&AG);
+ I != E; ++I) {
+ std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
+ if (ArgumentSCC.size() == 1) {
+ if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
+
+ // eg. "void f(int* x) { if (...) f(x); }"
+ if (ArgumentSCC[0]->Uses.size() == 1 &&
+ ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
+ ArgumentSCC[0]->Definition->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
+ continue;
+ }
+
+ bool SCCCaptured = false;
+ for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+ E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ ArgumentGraphNode *Node = *I;
+ if (Node->Uses.empty()) {
+ if (!Node->Definition->hasNoCaptureAttr())
+ SCCCaptured = true;
+ }
+ }
+ if (SCCCaptured) continue;
+
+ SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+ // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
+ // quickly looking up whether a given Argument is in this ArgumentSCC.
+ for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+ E = ArgumentSCC.end(); I != E; ++I) {
+ ArgumentSCCNodes.insert((*I)->Definition);
+ }
+
+ for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+ E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ ArgumentGraphNode *N = *I;
+ for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
+ UE = N->Uses.end(); UI != UE; ++UI) {
+ Argument *A = (*UI)->Definition;
+ if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
+ continue;
+ SCCCaptured = true;
+ break;
+ }
+ }
+ if (SCCCaptured) continue;
+
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ A->addAttr(Attribute::NoCapture);
+ ++NumNoCapture;
+ Changed = true;
+ }
}
return Changed;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3552d03919ba..1522aa408b6b 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@ namespace {
struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -80,11 +82,17 @@ namespace {
const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+
+ TargetData *TD;
+ TargetLibraryInfo *TLI;
};
}
char GlobalOpt::ID = 0;
-INITIALIZE_PASS(GlobalOpt, "globalopt",
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -143,18 +151,31 @@ struct GlobalStatus {
/// HasPHIUser - Set to true if this global has a user that is a PHI node.
bool HasPHIUser;
+ /// AtomicOrdering - Set to the strongest atomic ordering requirement.
+ AtomicOrdering Ordering;
+
GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
StoredOnceValue(0), AccessingFunction(0),
- HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
- HasPHIUser(false) {}
+ HasMultipleAccessingFunctions(false),
+ HasNonInstructionUser(false), HasPHIUser(false),
+ Ordering(NotAtomic) {}
};
}
-// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-// by constants itself. Note that constants cannot be cyclic, so this test is
-// pretty easy to implement recursively.
-//
+/// StrongerOrdering - Return the stronger of the two ordering. If the two
+/// orderings are acquire and release, then return AcquireRelease.
+///
+static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if (X == Acquire && Y == Release) return AcquireRelease;
+ if (Y == Acquire && X == Release) return AcquireRelease;
+ return (AtomicOrdering)std::max(X, Y);
+}
+
+/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+/// by constants itself. Note that constants cannot be cyclic, so this test is
+/// pretty easy to implement recursively.
+///
static bool SafeToDestroyConstant(const Constant *C) {
if (isa<GlobalValue>(C)) return false;
@@ -195,14 +216,16 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
}
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
GS.isLoaded = true;
- // Don't hack on volatile/atomic loads.
- if (!LI->isSimple()) return true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile()) return true;
+ GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
} else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Don't allow a store OF the address, only stores TO the address.
if (SI->getOperand(0) == V) return true;
- // Don't hack on volatile/atomic stores.
- if (!SI->isSimple()) return true;
+ // Don't hack on volatile stores.
+ if (SI->isVolatile()) return true;
+ GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
// If this is a direct store to the global (i.e., the global is a scalar
// value, not an aggregate), keep more specific information about
@@ -271,43 +294,12 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
return false;
}
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
- ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
- if (!CI) return 0;
- unsigned IdxV = CI->getZExtValue();
-
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
- if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
- } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
- if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
- if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
- } else if (isa<ConstantAggregateZero>(Agg)) {
- if (StructType *STy = dyn_cast<StructType>(Agg->getType())) {
- if (IdxV < STy->getNumElements())
- return Constant::getNullValue(STy->getElementType(IdxV));
- } else if (SequentialType *STy =
- dyn_cast<SequentialType>(Agg->getType())) {
- return Constant::getNullValue(STy->getElementType());
- }
- } else if (isa<UndefValue>(Agg)) {
- if (StructType *STy = dyn_cast<StructType>(Agg->getType())) {
- if (IdxV < STy->getNumElements())
- return UndefValue::get(STy->getElementType(IdxV));
- } else if (SequentialType *STy =
- dyn_cast<SequentialType>(Agg->getType())) {
- return UndefValue::get(STy->getElementType());
- }
- }
- return 0;
-}
-
-
/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
/// users of the global, cleaning up the obvious ones. This is largely just a
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+ TargetData *TD, TargetLibraryInfo *TLI) {
bool Changed = false;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
User *U = *UI++;
@@ -328,11 +320,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
Constant *SubInit = 0;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
} else if (CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, 0);
+ Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
}
if (CE->use_empty()) {
@@ -346,11 +338,17 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+
+ // If the initializer is an all-null value and we have an inbounds GEP,
+ // we already know what the result of any load from that GEP is.
+ // TODO: Handle splats.
+ if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
+ SubInit = Constant::getNullValue(GEP->getType()->getElementType());
}
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
if (GEP->use_empty()) {
GEP->eraseFromParent();
@@ -368,7 +366,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
if (SafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
- CleanupConstantGlobalUsers(V, Init);
+ CleanupConstantGlobalUsers(V, Init, TD, TLI);
return true;
}
}
@@ -514,8 +512,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
NewGlobals.reserve(STy->getNumElements());
const StructLayout &Layout = *TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Constant *In = getAggregateConstantElement(Init,
- ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
+ Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
GlobalVariable::InternalLinkage,
@@ -547,8 +544,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
- Constant *In = getAggregateConstantElement(Init,
- ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
+ Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
@@ -770,7 +766,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// value stored into it. If there are uses of the loaded value that would trap
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+ TargetData *TD,
+ TargetLibraryInfo *TLI) {
bool Changed = false;
// Keep track of whether we are able to remove all the uses of the global
@@ -813,7 +811,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
// nor is the global.
if (AllNonStoreUsesGone) {
DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
- CleanupConstantGlobalUsers(GV, 0);
+ CleanupConstantGlobalUsers(GV, 0, TD, TLI);
if (GV->use_empty()) {
GV->eraseFromParent();
++NumDeleted;
@@ -825,10 +823,11 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V) {
+static void ConstantPropUsersOf(Value *V,
+ TargetData *TD, TargetLibraryInfo *TLI) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
- if (Constant *NewC = ConstantFoldInstruction(I)) {
+ if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
I->replaceAllUsesWith(NewC);
// Advance UI to the next non-I use to avoid invalidating it!
@@ -848,7 +847,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
Type *AllocTy,
ConstantInt *NElements,
- TargetData* TD) {
+ TargetData *TD,
+ TargetLibraryInfo *TLI) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
Type *GlobalType;
@@ -906,7 +906,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
while (!GV->use_empty()) {
if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
// The global is initialized when the store to it occurs.
- new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
+ SI->getOrdering(), SI->getSynchScope(), SI);
SI->eraseFromParent();
continue;
}
@@ -921,7 +922,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
// Replace the cmp X, 0 with a use of the bool value.
- Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+ // Sink the load to where the compare was, if atomic rules allow us to.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+ LI->getOrdering(), LI->getSynchScope(),
+ LI->isUnordered() ? (Instruction*)ICI : LI);
InitBoolUsed = true;
switch (ICI->getPredicate()) {
default: llvm_unreachable("Unknown ICmp Predicate!");
@@ -962,9 +966,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV);
+ ConstantPropUsersOf(NewGV, TD, TLI);
if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue);
+ ConstantPropUsersOf(RepValue, TD, TLI);
return NewGV;
}
@@ -1203,7 +1207,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
} else {
llvm_unreachable("Unknown usable value");
- Result = 0;
}
return FieldVals[FieldNo] = Result;
@@ -1293,9 +1296,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value* NElems, TargetData *TD) {
+ Value *NElems, TargetData *TD) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
- Type* MAT = getMallocAllocatedType(CI);
+ Type *MAT = getMallocAllocatedType(CI);
StructType *STy = cast<StructType>(MAT);
// There is guaranteed to be at least one use of the malloc (storing
@@ -1482,8 +1485,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
Type *AllocTy,
+ AtomicOrdering Ordering,
Module::global_iterator &GVI,
- TargetData *TD) {
+ TargetData *TD,
+ TargetLibraryInfo *TLI) {
if (!TD)
return false;
@@ -1502,7 +1507,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// We can't optimize this if the malloc itself is used in a complex way,
// for example, being stored into multiple globals. This allows the
- // malloc to be stored into the specified global, loaded setcc'd, and
+ // malloc to be stored into the specified global, loaded icmp'd, and
// GEP'd. These are all things we could transform to using the global
// for.
SmallPtrSet<const PHINode*, 8> PHIs;
@@ -1523,7 +1528,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
return true;
}
@@ -1531,6 +1536,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// into multiple malloc'd arrays, one for each field. This is basically
// SRoA for malloc'd memory.
+ if (Ordering != NotAtomic)
+ return false;
+
// If this is an allocation of a fixed size array of structs, analyze as a
// variable size array. malloc [100 x struct],1 -> malloc struct, 100
if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
@@ -1563,7 +1571,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
return true;
}
@@ -1573,8 +1581,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
// that only one value (besides its initializer) is ever stored to the global.
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ AtomicOrdering Ordering,
Module::global_iterator &GVI,
- TargetData *TD) {
+ TargetData *TD, TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1589,12 +1598,13 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
- if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
- Type* MallocType = getMallocAllocatedType(CI);
- if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
- GVI, TD))
+ Type *MallocType = getMallocAllocatedType(CI);
+ if (MallocType &&
+ TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
+ TD, TLI))
return true;
}
}
@@ -1670,7 +1680,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
- StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+ StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ LI->getOrdering(), LI->getSynchScope(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
"This is not a form that we understand!");
@@ -1678,11 +1689,13 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
}
}
- new StoreInst(StoreVal, NewGV, SI);
+ new StoreInst(StoreVal, NewGV, false, 0,
+ SI->getOrdering(), SI->getSynchScope(), SI);
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
- LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+ LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ LI->getOrdering(), LI->getSynchScope(), LI);
Value *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1699,8 +1712,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
-/// ProcessInternalGlobal - Analyze the specified global variable and optimize
-/// it if possible. If we make a change, return true.
+/// ProcessGlobal - Analyze the specified global variable and optimize it if
+/// possible. If we make a change, return true.
bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
Module::global_iterator &GVI) {
if (!GV->hasLocalLinkage())
@@ -1737,7 +1750,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
/// it if possible. If we make a change, return true.
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS) {
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive we can make
@@ -1755,11 +1768,11 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GS.AccessingFunction->hasExternalLinkage() &&
GV->getType()->getAddressSpace() == 0) {
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
- Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
- Type* ElemTy = GV->getType()->getElementType();
+ Type *ElemTy = GV->getType()->getElementType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
@@ -1776,7 +1789,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),
+ TD, TLI);
// If the global is dead now, delete it.
if (GV->use_empty()) {
@@ -1791,7 +1805,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer());
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -1820,7 +1834,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer());
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
if (GV->use_empty()) {
DEBUG(dbgs() << " *** Substituting initializer allowed us to "
@@ -1836,8 +1850,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
- getAnalysisIfAvailable<TargetData>()))
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
+ TD, TLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1890,7 +1904,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
if (!F->hasName() && !F->isDeclaration())
F->setLinkage(GlobalValue::InternalLinkage);
F->removeDeadConstantUsers();
- if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+ if (F->isDefTriviallyDead()) {
F->eraseFromParent();
Changed = true;
++NumFnDeleted;
@@ -1930,8 +1944,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
// Simplify the initializer.
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- Constant *New = ConstantFoldConstantExpression(CE, TD);
+ Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
if (New && New != CE)
GV->setInitializer(New);
}
@@ -2052,16 +2065,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
-static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
- if (Constant *CV = dyn_cast<Constant>(V)) return CV;
- Constant *R = ComputedValues[V];
- assert(R && "Reference to an uncomputed value!");
- return R;
-}
-
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants);
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const TargetData *TD);
/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
@@ -2073,7 +2080,8 @@ isSimpleEnoughValueToCommit(Constant *C,
/// in SimpleConstants to avoid having to rescan the same constants all the
/// time.
static bool isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const TargetData *TD) {
// Simple integer, undef, constant aggregate zero, global addresses, etc are
// all supported.
if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
@@ -2085,7 +2093,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
isa<ConstantVector>(C)) {
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
Constant *Op = cast<Constant>(C->getOperand(i));
- if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+ if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
return false;
}
return true;
@@ -2097,34 +2105,42 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
ConstantExpr *CE = cast<ConstantExpr>(C);
switch (CE->getOpcode()) {
case Instruction::BitCast:
+ // Bitcast is fine if the casted value is fine.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+
case Instruction::IntToPtr:
case Instruction::PtrToInt:
- // These casts are always fine if the casted value is.
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+ // int <=> ptr is fine if the int type is the same size as the
+ // pointer type.
+ if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
+ TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
// GEP is fine if it is simple + constant offset.
case Instruction::GetElementPtr:
for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
if (!isa<ConstantInt>(CE->getOperand(i)))
return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
case Instruction::Add:
// We allow simple+cst.
if (!isa<ConstantInt>(CE->getOperand(1)))
return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
}
return false;
}
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const TargetData *TD) {
// If we already checked this constant, we win.
if (!SimpleConstants.insert(C)) return true;
// Check the constant.
- return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
}
@@ -2191,23 +2207,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
return Val;
}
- std::vector<Constant*> Elts;
+ SmallVector<Constant*, 32> Elts;
if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
-
// Break up the constant into its elements.
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
- for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
- Elts.push_back(cast<Constant>(*i));
- } else if (isa<ConstantAggregateZero>(Init)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
- } else if (isa<UndefValue>(Init)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- Elts.push_back(UndefValue::get(STy->getElementType(i)));
- } else {
- llvm_unreachable("This code is out of sync with "
- " ConstantFoldLoadThroughGEPConstantExpr");
- }
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
// Replace the element that we are supposed to.
ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
@@ -2226,22 +2230,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
NumElts = ATy->getNumElements();
else
- NumElts = cast<VectorType>(InitTy)->getNumElements();
+ NumElts = InitTy->getVectorNumElements();
// Break up the array into elements.
- if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
- Elts.push_back(cast<Constant>(*i));
- } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
- for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
- Elts.push_back(cast<Constant>(*i));
- } else if (isa<ConstantAggregateZero>(Init)) {
- Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
- } else {
- assert(isa<UndefValue>(Init) && "This code is out of sync with "
- " ConstantFoldLoadThroughGEPConstantExpr");
- Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
- }
+ for (uint64_t i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
assert(CI->getZExtValue() < NumElts);
Elts[CI->getZExtValue()] =
@@ -2266,15 +2259,109 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
}
+namespace {
+
+/// Evaluator - This class evaluates LLVM IR, producing the Constant
+/// representing each SSA instruction. Changes to global variables are stored
+/// in a mapping that can be iterated over after the evaluation is complete.
+/// Once an evaluation call fails, the evaluation object should not be reused.
+class Evaluator {
+public:
+ Evaluator(const TargetData *TD, const TargetLibraryInfo *TLI)
+ : TD(TD), TLI(TLI) {
+ ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ }
+
+ ~Evaluator() {
+ DeleteContainerPointers(ValueStack);
+ while (!AllocaTmps.empty()) {
+ GlobalVariable *Tmp = AllocaTmps.back();
+ AllocaTmps.pop_back();
+
+ // If there are still users of the alloca, the program is doing something
+ // silly, e.g. storing the address of the alloca somewhere and using it
+ // later. Since this is undefined, we'll just make it be null.
+ if (!Tmp->use_empty())
+ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+ delete Tmp;
+ }
+ }
+
+ /// EvaluateFunction - Evaluate a call to function F, returning true if
+ /// successful, false if we can't evaluate it. ActualArgs contains the formal
+ /// arguments for the function.
+ bool EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs);
+
+ /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
+ /// successful, false if we can't evaluate it. NewBB returns the next BB that
+ /// control flows into, or null upon return.
+ bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
+
+ Constant *getVal(Value *V) {
+ if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+ Constant *R = ValueStack.back()->lookup(V);
+ assert(R && "Reference to an uncomputed value!");
+ return R;
+ }
+
+ void setVal(Value *V, Constant *C) {
+ ValueStack.back()->operator[](V) = C;
+ }
+
+ const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
+ return MutatedMemory;
+ }
+
+ const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
+ return Invariants;
+ }
+
+private:
+ Constant *ComputeLoadResult(Constant *P);
+
+ /// ValueStack - As we compute SSA register values, we store their contents
+ /// here. The back of the vector contains the current function and the stack
+ /// contains the values in the calling frames.
+ SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
+
+ /// CallStack - This is used to detect recursion. In pathological situations
+ /// we could hit exponential behavior, but at least there is nothing
+ /// unbounded.
+ SmallVector<Function*, 4> CallStack;
+
+ /// MutatedMemory - For each store we execute, we update this map. Loads
+ /// check this to get the most up-to-date value. If evaluation is successful,
+ /// this state is committed to the process.
+ DenseMap<Constant*, Constant*> MutatedMemory;
+
+ /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+ /// to represent its body. This vector is needed so we can delete the
+ /// temporary globals when we are done.
+ SmallVector<GlobalVariable*, 32> AllocaTmps;
+
+ /// Invariants - These global variables have been marked invariant by the
+ /// static constructor.
+ SmallPtrSet<GlobalVariable*, 8> Invariants;
+
+ /// SimpleConstants - These are constants we have checked and know to be
+ /// simple enough to live in a static initializer of a global.
+ SmallPtrSet<Constant*, 8> SimpleConstants;
+
+ const TargetData *TD;
+ const TargetLibraryInfo *TLI;
+};
+
+} // anonymous namespace
+
/// ComputeLoadResult - Return the value that would be computed by a load from
/// P after the stores reflected by 'memory' have been performed. If we can't
/// decide, return null.
-static Constant *ComputeLoadResult(Constant *P,
- const DenseMap<Constant*, Constant*> &Memory) {
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
- DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
- if (I != Memory.end()) return I->second;
+ DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+ if (I != MutatedMemory.end()) return I->second;
// Access it.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
@@ -2295,56 +2382,29 @@ static Constant *ComputeLoadResult(Constant *P,
return 0; // don't know how to evaluate.
}
-/// EvaluateFunction - Evaluate a call to function F, returning true if
-/// successful, false if we can't evaluate it. ActualArgs contains the formal
-/// arguments for the function.
-static bool EvaluateFunction(Function *F, Constant *&RetVal,
- const SmallVectorImpl<Constant*> &ActualArgs,
- std::vector<Function*> &CallStack,
- DenseMap<Constant*, Constant*> &MutatedMemory,
- std::vector<GlobalVariable*> &AllocaTmps,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
- const TargetData *TD) {
- // Check to see if this function is already executing (recursion). If so,
- // bail out. TODO: we might want to accept limited recursion.
- if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
- return false;
-
- CallStack.push_back(F);
-
- /// Values - As we compute SSA register values, we store their contents here.
- DenseMap<Value*, Constant*> Values;
-
- // Initialize arguments to the incoming values specified.
- unsigned ArgNo = 0;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
- ++AI, ++ArgNo)
- Values[AI] = ActualArgs[ArgNo];
-
- /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
- /// we can only evaluate any one basic block at most once. This set keeps
- /// track of what we have executed so we can detect recursive cases etc.
- SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
- // CurInst - The current instruction we're evaluating.
- BasicBlock::iterator CurInst = F->begin()->begin();
-
+/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
+/// successful, false if we can't evaluate it. NewBB returns the next BB that
+/// control flows into, or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+ BasicBlock *&NextBB) {
// This is the main evaluation loop.
while (1) {
Constant *InstResult = 0;
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (!SI->isSimple()) return false; // no volatile/atomic accesses.
- Constant *Ptr = getVal(Values, SI->getOperand(1));
+ Constant *Ptr = getVal(SI->getOperand(1));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
return false;
- Constant *Val = getVal(Values, SI->getOperand(0));
+ Constant *Val = getVal(SI->getOperand(0));
// If this might be too difficult for the backend to handle (e.g. the addr
// of one global variable divided by another) then we can't commit it.
- if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
return false;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
@@ -2354,7 +2414,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
// stored value.
Ptr = CE->getOperand(0);
- Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+ Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
// In order to push the bitcast onto the stored value, a bitcast
// from NewTy to Val's type must be legal. If it's not, we can try
@@ -2366,16 +2426,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (StructType *STy = dyn_cast<StructType>(NewTy)) {
NewTy = STy->getTypeAtIndex(0U);
- IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+ IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
Constant * const IdxList[] = {IdxZero, IdxZero};
Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
-
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+
// If we can't improve the situation by introspecting NewTy,
// we have to give up.
} else {
- return 0;
+ return false;
}
}
@@ -2387,33 +2449,35 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
MutatedMemory[Ptr] = Val;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
InstResult = ConstantExpr::get(BO->getOpcode(),
- getVal(Values, BO->getOperand(0)),
- getVal(Values, BO->getOperand(1)));
+ getVal(BO->getOperand(0)),
+ getVal(BO->getOperand(1)));
} else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
InstResult = ConstantExpr::getCompare(CI->getPredicate(),
- getVal(Values, CI->getOperand(0)),
- getVal(Values, CI->getOperand(1)));
+ getVal(CI->getOperand(0)),
+ getVal(CI->getOperand(1)));
} else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
InstResult = ConstantExpr::getCast(CI->getOpcode(),
- getVal(Values, CI->getOperand(0)),
+ getVal(CI->getOperand(0)),
CI->getType());
} else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
- InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
- getVal(Values, SI->getOperand(1)),
- getVal(Values, SI->getOperand(2)));
+ InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+ getVal(SI->getOperand(1)),
+ getVal(SI->getOperand(2)));
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
- Constant *P = getVal(Values, GEP->getOperand(0));
+ Constant *P = getVal(GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
i != e; ++i)
- GEPOps.push_back(getVal(Values, *i));
+ GEPOps.push_back(getVal(*i));
InstResult =
ConstantExpr::getGetElementPtr(P, GEPOps,
cast<GEPOperator>(GEP)->isInBounds());
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
if (!LI->isSimple()) return false; // no volatile/atomic accesses.
- InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
- MutatedMemory);
+ Constant *Ptr = getVal(LI->getOperand(0));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ InstResult = ComputeLoadResult(Ptr);
if (InstResult == 0) return false; // Could not evaluate load.
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
@@ -2423,25 +2487,53 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
UndefValue::get(Ty),
AI->getName()));
InstResult = AllocaTmps.back();
- } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+ } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+ CallSite CS(CurInst);
// Debug info can safely be ignored here.
- if (isa<DbgInfoIntrinsic>(CI)) {
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
++CurInst;
continue;
}
// Cannot handle inline asm.
- if (isa<InlineAsm>(CI->getCalledValue())) return false;
-
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) {
- if (MSI->isVolatile()) return false;
- Constant *Ptr = getVal(Values, MSI->getDest());
- Constant *Val = getVal(Values, MSI->getValue());
- Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr),
- MutatedMemory);
- if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
- // This memset is a no-op.
+ if (isa<InlineAsm>(CS.getCalledValue())) return false;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+ if (MSI->isVolatile()) return false;
+ Constant *Ptr = getVal(MSI->getDest());
+ Constant *Val = getVal(MSI->getValue());
+ Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+ if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+ // This memset is a no-op.
+ ++CurInst;
+ continue;
+ }
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ ++CurInst;
+ continue;
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+ // We don't insert an entry into Values, as it doesn't have a
+ // meaningful return value.
+ if (!II->use_empty())
+ return false;
+ ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+ Value *PtrArg = getVal(II->getArgOperand(1));
+ Value *Ptr = PtrArg->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
+ if (!Size->isAllOnesValue() &&
+ Size->getValue().getLimitedValue() >=
+ TD->getTypeStoreSize(ElemTy))
+ Invariants.insert(GV);
+ }
+ // Continue even if we do nothing.
++CurInst;
continue;
}
@@ -2449,19 +2541,17 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
}
// Resolve function pointers.
- Function *Callee = dyn_cast<Function>(getVal(Values,
- CI->getCalledValue()));
- if (!Callee) return false; // Cannot resolve.
+ Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+ if (!Callee || Callee->mayBeOverridden())
+ return false; // Cannot resolve.
SmallVector<Constant*, 8> Formals;
- CallSite CS(CI);
- for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i)
- Formals.push_back(getVal(Values, *i));
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+ Formals.push_back(getVal(*i));
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(Callee, Formals)) {
+ if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
InstResult = C;
} else {
return false;
@@ -2472,62 +2562,43 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
Constant *RetVal;
// Execute the call, if successful, use the return value.
- if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
- MutatedMemory, AllocaTmps, SimpleConstants, TD))
+ ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ if (!EvaluateFunction(Callee, RetVal, Formals))
return false;
+ delete ValueStack.pop_back_val();
InstResult = RetVal;
}
} else if (isa<TerminatorInst>(CurInst)) {
- BasicBlock *NewBB = 0;
if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
if (BI->isUnconditional()) {
- NewBB = BI->getSuccessor(0);
+ NextBB = BI->getSuccessor(0);
} else {
ConstantInt *Cond =
- dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+ dyn_cast<ConstantInt>(getVal(BI->getCondition()));
if (!Cond) return false; // Cannot determine.
- NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ NextBB = BI->getSuccessor(!Cond->getZExtValue());
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
ConstantInt *Val =
- dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+ dyn_cast<ConstantInt>(getVal(SI->getCondition()));
if (!Val) return false; // Cannot determine.
- NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+ NextBB = SI->findCaseValue(Val).getCaseSuccessor();
} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
- Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+ Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
- NewBB = BA->getBasicBlock();
+ NextBB = BA->getBasicBlock();
else
return false; // Cannot determine.
- } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
- if (RI->getNumOperands())
- RetVal = getVal(Values, RI->getOperand(0));
-
- CallStack.pop_back(); // return from fn.
- return true; // We succeeded at evaluating this ctor!
+ } else if (isa<ReturnInst>(CurInst)) {
+ NextBB = 0;
} else {
// invoke, unwind, resume, unreachable.
return false; // Cannot handle this terminator.
}
- // Okay, we succeeded in evaluating this control flow. See if we have
- // executed the new block before. If so, we have a looping function,
- // which we cannot evaluate in reasonable time.
- if (!ExecutedBlocks.insert(NewBB))
- return false; // looped!
-
- // Okay, we have never been in this block before. Check to see if there
- // are any PHI nodes. If so, evaluate them with information about where
- // we came from.
- BasicBlock *OldBB = CurInst->getParent();
- CurInst = NewBB->begin();
- PHINode *PN;
- for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
- Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
-
- // Do NOT increment CurInst. We know that the terminator had no value.
- continue;
+ // We succeeded at evaluating this block!
+ return true;
} else {
// Did not know how to evaluate this!
return false;
@@ -2535,9 +2606,15 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (!CurInst->use_empty()) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
- InstResult = ConstantFoldConstantExpression(CE, TD);
+ InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
- Values[CurInst] = InstResult;
+ setVal(CurInst, InstResult);
+ }
+
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ return true;
}
// Advance program counter.
@@ -2545,64 +2622,96 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
}
}
-/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
-/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
- /// MutatedMemory - For each store we execute, we update this map. Loads
- /// check this to get the most up-to-date value. If evaluation is successful,
- /// this state is committed to the process.
- DenseMap<Constant*, Constant*> MutatedMemory;
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it. ActualArgs contains the formal
+/// arguments for the function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
- /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
- /// to represent its body. This vector is needed so we can delete the
- /// temporary globals when we are done.
- std::vector<GlobalVariable*> AllocaTmps;
+ CallStack.push_back(F);
- /// CallStack - This is used to detect recursion. In pathological situations
- /// we could hit exponential behavior, but at least there is nothing
- /// unbounded.
- std::vector<Function*> CallStack;
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ setVal(AI, ActualArgs[ArgNo]);
- /// SimpleConstants - These are constants we have checked and know to be
- /// simple enough to live in a static initializer of a global.
- SmallPtrSet<Constant*, 8> SimpleConstants;
-
+ // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ // we can only evaluate any one basic block at most once. This set keeps
+ // track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurBB - The current basic block we're evaluating.
+ BasicBlock *CurBB = F->begin();
+
+ BasicBlock::iterator CurInst = CurBB->begin();
+
+ while (1) {
+ BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+ if (!EvaluateBlock(CurInst, NextBB))
+ return false;
+
+ if (NextBB == 0) {
+ // Successfully running until there's no next block means that we found
+ // the return. Fill it the return value and pop the call stack.
+ ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+ if (RI->getNumOperands())
+ RetVal = getVal(RI->getOperand(0));
+ CallStack.pop_back();
+ return true;
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NextBB))
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ PHINode *PN = 0;
+ for (CurInst = NextBB->begin();
+ (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+ // Advance to the next block.
+ CurBB = NextBB;
+ }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can. Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Call the function.
+ Evaluator Eval(TD, TLI);
Constant *RetValDummy;
- bool EvalSuccess = EvaluateFunction(F, RetValDummy,
- SmallVector<Constant*, 0>(), CallStack,
- MutatedMemory, AllocaTmps,
- SimpleConstants, TD);
+ bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
+ SmallVector<Constant*, 0>());
if (EvalSuccess) {
// We succeeded at evaluation: commit the result.
DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
- << F->getName() << "' to " << MutatedMemory.size()
+ << F->getName() << "' to " << Eval.getMutatedMemory().size()
<< " stores.\n");
- for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
- E = MutatedMemory.end(); I != E; ++I)
+ for (DenseMap<Constant*, Constant*>::const_iterator I =
+ Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
+ I != E; ++I)
CommitValueTo(I->second, I->first);
- }
-
- // At this point, we are done interpreting. If we created any 'alloca'
- // temporaries, release them now.
- while (!AllocaTmps.empty()) {
- GlobalVariable *Tmp = AllocaTmps.back();
- AllocaTmps.pop_back();
-
- // If there are still users of the alloca, the program is doing something
- // silly, e.g. storing the address of the alloca somewhere and using it
- // later. Since this is undefined, we'll just make it be null.
- if (!Tmp->use_empty())
- Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
- delete Tmp;
+ for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
+ Eval.getInvariants().begin(), E = Eval.getInvariants().end();
+ I != E; ++I)
+ (*I)->setConstant(true);
}
return EvalSuccess;
}
-
-
/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
/// Return true if anything changed.
bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
@@ -2610,7 +2719,6 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
bool MadeChange = false;
if (Ctors.empty()) return false;
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
// Loop over global ctors, optimizing them when we can.
for (unsigned i = 0; i != Ctors.size(); ++i) {
Function *F = Ctors[i];
@@ -2628,7 +2736,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (F->empty()) continue;
// If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F, TD)) {
+ if (EvaluateStaticConstructor(F, TD, TLI)) {
Ctors.erase(Ctors.begin()+i);
MadeChange = true;
--i;
@@ -2700,12 +2808,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
return Changed;
}
-static Function *FindCXAAtExit(Module &M) {
- Function *Fn = M.getFunction("__cxa_atexit");
+static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::cxa_atexit))
+ return 0;
+
+ Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
if (!Fn)
return 0;
-
+
FunctionType *FTy = Fn->getFunctionType();
// Checking that the function has the right return type, the right number of
@@ -2724,7 +2835,8 @@ static Function *FindCXAAtExit(Module &M) {
/// destructor and can therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
/// the code so we only look for a function with a single basic block, where
-/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor.
+/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
+/// other side-effect free instructions.
static bool cxxDtorIsEmpty(const Function &Fn,
SmallPtrSet<const Function *, 8> &CalledFunctions) {
// FIXME: We could eliminate C++ destructors if they're readonly/readnone and
@@ -2757,9 +2869,9 @@ static bool cxxDtorIsEmpty(const Function &Fn,
if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
return false;
} else if (isa<ReturnInst>(*I))
- return true;
- else
- return false;
+ return true; // We're done.
+ else if (I->mayHaveSideEffects())
+ return false; // Destructor with side effects, bail.
}
return false;
@@ -2815,10 +2927,13 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
+ TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
- Function *CXAAtExitFn = FindCXAAtExit(M);
+ Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
bool LocalChange = true;
while (LocalChange) {
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index c0426da2c687..664ddf6f7a2b 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,34 +32,21 @@ namespace {
// AlwaysInliner only inlines functions that are mark as "always inline".
class AlwaysInliner : public Inliner {
- // Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
- InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000) {
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
- static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallSite CS) {
- return CA.getInlineCost(CS, NeverInline);
- }
- float getInlineFudgeFactor(CallSite CS) {
- return CA.getInlineFudgeFactor(CS);
- }
- void resetCachedCostInfo(Function *Caller) {
- CA.resetCachedCostInfo(Caller);
- }
- void growCachedCostInfo(Function* Caller, Function* Callee) {
- CA.growCachedCostInfo(Caller, Callee);
+ AlwaysInliner(bool InsertLifetime) : Inliner(ID, -2000000000,
+ InsertLifetime) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
+ static char ID; // Pass identification, replacement for typeid
+ virtual InlineCost getInlineCost(CallSite CS);
virtual bool doFinalization(CallGraph &CG) {
- return removeDeadFunctions(CG, &NeverInline);
+ return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
}
virtual bool doInitialization(CallGraph &CG);
- void releaseMemory() {
- CA.clear();
- }
};
}
@@ -72,17 +59,74 @@ INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
-// doInitialization - Initializes the vector of functions that have not
-// been annotated with the "always inline" attribute.
-bool AlwaysInliner::doInitialization(CallGraph &CG) {
- CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
+ return new AlwaysInliner(InsertLifetime);
+}
+
+/// \brief Minimal filter to detect invalid constructs for inlining.
+static bool isInlineViable(Function &F) {
+ bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ // Disallow inlining of functions which contain an indirect branch.
+ if (isa<IndirectBrInst>(BI->getTerminator()))
+ return false;
- Module &M = CG.getModule();
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+ ++II) {
+ CallSite CS(II);
+ if (!CS)
+ continue;
- for (Module::iterator I = M.begin(), E = M.end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
- NeverInline.insert(I);
+ // Disallow recursive calls.
+ if (&F == CS.getCalledFunction())
+ return false;
+ // Disallow calls which expose returns-twice to a function not previously
+ // attributed as such.
+ if (!ReturnsTwice && CS.isCall() &&
+ cast<CallInst>(CS.getInstruction())->canReturnTwice())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+ // We assume indirect calls aren't calling an always-inline function.
+ if (!Callee) return InlineCost::getNever();
+
+ // We can't inline calls to external functions.
+ // FIXME: We shouldn't even get here.
+ if (Callee->isDeclaration()) return InlineCost::getNever();
+
+ // Return never for anything not marked as always inline.
+ if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getNever();
+
+ // Do some minimal analysis to preclude non-viable functions.
+ if (!isInlineViable(*Callee))
+ return InlineCost::getNever();
+
+ // Otherwise, force inlining.
+ return InlineCost::getAlways();
+}
+
+// doInitialization - Initializes the vector of functions that have not
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
return false;
}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 84dd4fdd9887..50038d81161b 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -23,40 +23,26 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
namespace {
class SimpleInliner : public Inliner {
- // Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
SimpleInliner() : Inliner(ID) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
- SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold,
+ /*InsertLifetime*/true) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
- return CA.getInlineCost(CS, NeverInline);
- }
- float getInlineFudgeFactor(CallSite CS) {
- return CA.getInlineFudgeFactor(CS);
- }
- void resetCachedCostInfo(Function *Caller) {
- CA.resetCachedCostInfo(Caller);
- }
- void growCachedCostInfo(Function* Caller, Function* Callee) {
- CA.growCachedCostInfo(Caller, Callee);
+ return CA.getInlineCost(CS, getInlineThreshold(CS));
}
virtual bool doInitialization(CallGraph &CG);
- void releaseMemory() {
- CA.clear();
- }
};
}
@@ -77,44 +63,6 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
// annotated with the noinline attribute.
bool SimpleInliner::doInitialization(CallGraph &CG) {
CA.setTargetData(getAnalysisIfAvailable<TargetData>());
-
- Module &M = CG.getModule();
-
- for (Module::iterator I = M.begin(), E = M.end();
- I != E; ++I)
- if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
- NeverInline.insert(I);
-
- // Get llvm.noinline
- GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
-
- if (GV == 0)
- return false;
-
- // Don't crash on invalid code
- if (!GV->hasDefinitiveInitializer())
- return false;
-
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-
- if (InitList == 0)
- return false;
-
- // Iterate over each element and add to the NeverInline set
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-
- // Get Source
- const Constant *Elt = InitList->getOperand(i);
-
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
- if (CE->getOpcode() == Instruction::BitCast)
- Elt = CE->getOperand(0);
-
- // Insert into set of functions to never inline
- if (const Function *F = dyn_cast<Function>(Elt))
- NeverInline.insert(F);
- }
-
return false;
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index f00935b08887..dc9cbfb05e29 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -36,6 +36,11 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+// This weirdly named statistic tracks the number of times that, when attemting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
static cl::opt<int>
InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -48,11 +53,12 @@ HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
const int OptSizeThreshold = 75;
Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
-Inliner::Inliner(char &ID, int Threshold)
+Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
- InlineLimit : Threshold) {}
+ InlineLimit : Threshold),
+ InsertLifetime(InsertLifetime) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
/// the call graph. If the derived class implements this method, it should
@@ -75,13 +81,13 @@ InlinedArrayAllocasTy;
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
- int InlineHistory) {
+ int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
// Try to inline the function. Get the list of static allocas that were
// inlined.
- if (!InlineFunction(CS, IFI))
+ if (!InlineFunction(CS, IFI, InsertLifetime))
return false;
// If the inlined function had a higher stack protection level than the
@@ -230,29 +236,37 @@ bool Inliner::shouldInline(CallSite CS) {
return false;
}
- int Cost = IC.getValue();
Function *Caller = CS.getCaller();
- int CurrentThreshold = getInlineThreshold(CS);
- float FudgeFactor = getInlineFudgeFactor(CS);
- int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
- if (Cost >= AdjThreshold) {
- DEBUG(dbgs() << " NOT Inlining: cost=" << Cost
- << ", thres=" << AdjThreshold
+ if (!IC) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
return false;
}
- // Try to detect the case where the current inlining candidate caller
- // (call it B) is a static function and is an inlining candidate elsewhere,
- // and the current candidate callee (call it C) is large enough that
- // inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B
- // into its callers.
- if (Caller->hasLocalLinkage()) {
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
+ if (Caller->hasLocalLinkage() ||
+ Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
int TotalSecondaryCost = 0;
- bool outerCallsFound = false;
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
// This bool tracks what happens if we do NOT inline C into B.
- bool callerWillBeRemoved = true;
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
bool inliningPreventsSomeOuterInline = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
@@ -268,26 +282,20 @@ bool Inliner::shouldInline(CallSite CS) {
}
InlineCost IC2 = getInlineCost(CS2);
- if (IC2.isNever())
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
callerWillBeRemoved = false;
- if (IC2.isAlways() || IC2.isNever())
+ continue;
+ }
+ if (IC2.isAlways())
continue;
- outerCallsFound = true;
- int Cost2 = IC2.getValue();
- int CurrentThreshold2 = getInlineThreshold(CS2);
- float FudgeFactor2 = getInlineFudgeFactor(CS2);
-
- if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
- callerWillBeRemoved = false;
-
- // See if we have this case. We subtract off the penalty
- // for the call instruction, which we would be deleting.
- if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
- Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
- (int)(CurrentThreshold2 * FudgeFactor2)) {
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
inliningPreventsSomeOuterInline = true;
- TotalSecondaryCost += Cost2;
+ TotalSecondaryCost += IC2.getCost();
}
}
// If all outer calls to Caller would get inlined, the cost for the last
@@ -297,17 +305,16 @@ bool Inliner::shouldInline(CallSite CS) {
if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
- if (outerCallsFound && inliningPreventsSomeOuterInline &&
- TotalSecondaryCost < Cost) {
- DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
- " Cost = " << Cost <<
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << IC.getCost() <<
", outer Cost = " << TotalSecondaryCost << '\n');
return false;
}
}
- DEBUG(dbgs() << " Inlining: cost=" << Cost
- << ", thres=" << AdjThreshold
+ DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << '\n');
return true;
}
@@ -326,7 +333,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
-
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
@@ -415,8 +421,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CG[Caller]->removeCallEdgeFor(CS);
CS.getInstruction()->eraseFromParent();
++NumCallsDeleted;
- // Update the cached cost info with the missing call
- growCachedCostInfo(Caller, NULL);
} else {
// We can only inline direct calls to non-declarations.
if (Callee == 0 || Callee->isDeclaration()) continue;
@@ -439,7 +443,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID))
+ InlineHistoryID, InsertLifetime))
continue;
++NumInlined;
@@ -457,9 +461,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
}
}
-
- // Update the cached cost info with the inlined call.
- growCachedCostInfo(Caller, Callee);
}
// If we inlined or deleted the last possible call site to the function,
@@ -479,8 +480,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Remove any call graph edges from the callee to its callees.
CalleeNode->removeAllCalledFunctions();
- resetCachedCostInfo(Callee);
-
// Removing the node for callee from the call graph and delete it.
delete CG.removeFunctionFromModule(CalleeNode);
++NumDeleted;
@@ -514,29 +513,28 @@ bool Inliner::doFinalization(CallGraph &CG) {
/// removeDeadFunctions - Remove dead functions that are not included in
/// DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG,
- SmallPtrSet<const Function *, 16> *DNR) {
- SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+ SmallVector<CallGraphNode*, 16> FunctionsToRemove;
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
CallGraphNode *CGN = I->second;
- if (CGN->getFunction() == 0)
- continue;
-
Function *F = CGN->getFunction();
-
+ if (!F || F->isDeclaration())
+ continue;
+
+ // Handle the case when this function is called and we only want to care
+ // about always-inline functions. This is a bit of a hack to share code
+ // between here and the InlineAlways pass.
+ if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+ continue;
+
// If the only remaining users of the function are dead constants, remove
// them.
F->removeDeadConstantUsers();
- if (DNR && DNR->count(F))
- continue;
- if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
- !F->hasAvailableExternallyLinkage())
- continue;
- if (!F->use_empty())
+ if (!F->isDefTriviallyDead())
continue;
// Remove any call graph edges from the function to its callees.
@@ -548,24 +546,27 @@ bool Inliner::removeDeadFunctions(CallGraph &CG,
CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
// Removing the node for callee from the call graph and delete it.
- FunctionsToRemove.insert(CGN);
+ FunctionsToRemove.push_back(CGN);
}
+ if (FunctionsToRemove.empty())
+ return false;
// Now that we know which functions to delete, do so. We didn't want to do
// this inline, because that would invalidate our CallGraph::iterator
// objects. :(
//
- // Note that it doesn't matter that we are iterating over a non-stable set
+ // Note that it doesn't matter that we are iterating over a non-stable order
// here to do this, it doesn't matter which order the functions are deleted
// in.
- bool Changed = false;
- for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
- E = FunctionsToRemove.end(); I != E; ++I) {
- resetCachedCostInfo((*I)->getFunction());
+ array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+ FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+ FunctionsToRemove.end()),
+ FunctionsToRemove.end());
+ for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end();
+ I != E; ++I) {
delete CG.removeFunctionFromModule(*I);
++NumDeleted;
- Changed = true;
}
-
- return Changed;
+ return true;
}
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 7cb1d18f933d..cd29e7a7a7da 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -122,6 +122,9 @@ bool InternalizePass::runOnModule(Module &M) {
bool Changed = false;
+ // Never internalize functions which code-gen might insert.
+ ExternalNames.insert("__stack_chk_fail");
+
// Mark all functions not in the api as internal.
// FIXME: maybe use private linkage?
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -148,9 +151,11 @@ bool InternalizePass::runOnModule(Module &M) {
// won't find them. (see MachineModuleInfo.)
ExternalNames.insert("llvm.global_ctors");
ExternalNames.insert("llvm.global_dtors");
- ExternalNames.insert("llvm.noinline");
ExternalNames.insert("llvm.global.annotations");
+ // Never internalize symbols code-gen inserts.
+ ExternalNames.insert("__stack_chk_guard");
+
// Mark all global variables with initializers that are not in the api as
// internal as well.
// FIXME: maybe use private linkage?
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
new file mode 100644
index 000000000000..b18c9150f440
--- /dev/null
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/IPO/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IPO
+parent = Transforms
+library_name = ipo
+required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8fdfd72237f5..a1b0a4580bf5 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -22,14 +22,19 @@
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
+static cl::opt<bool>
+RunVectorization("vectorize", cl::desc("Run vectorization passes"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -38,6 +43,7 @@ PassManagerBuilder::PassManagerBuilder() {
DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
+ Vectorize = RunVectorization;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -101,6 +107,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(Inliner);
Inliner = 0;
}
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -110,6 +117,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addInitialAliasAnalysisPasses(MPM);
if (!DisableUnitAtATime) {
+ addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
MPM.add(createIPSCCPPass()); // IP SCCP
@@ -170,6 +179,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+ if (Vectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1)
+ MPM.add(createGVNPass()); // Remove redundancies
+ }
+
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
@@ -186,11 +202,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (OptLevel > 1)
MPM.add(createConstantMergePass()); // Merge dup global constants
}
+ addExtensionsToPM(EP_OptimizerLast, MPM);
}
void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
bool Internalize,
- bool RunInliner) {
+ bool RunInliner,
+ bool DisableGVNLoadPRE) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
@@ -246,9 +264,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createFunctionAttrsPass()); // Add nocapture.
PM.add(createGlobalsModRefPass()); // IP alias analysis.
- PM.add(createLICMPass()); // Hoist loop invariants.
- PM.add(createGVNPass()); // Remove redundancies.
- PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+ PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+ PM.add(createMemCpyOptPass()); // Remove dead memcpys.
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
@@ -340,4 +358,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
PassManagerBase *LPM = unwrap(PM);
Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
}
-
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index cbb80f075087..c8cc8fd1930b 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -101,8 +101,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// Check to see if this function performs an unwind or calls an
// unwinding function.
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) ||
- isa<ResumeInst>(BB->getTerminator()))) {
+ if (CheckUnwind && isa<ResumeInst>(BB->getTerminator())) {
// Uses unwind / resume!
SCCMightUnwind = true;
} else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index a46d5adc0ab4..d070ccc0d63f 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
-
-add_llvm_library_dependencies(LLVMInstCombine
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 38082787ce4d..199df519ce07 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -22,6 +22,7 @@
namespace llvm {
class CallSite;
class TargetData;
+ class TargetLibraryInfo;
class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
@@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
TargetData *TD;
+ TargetLibraryInfo *TLI;
bool MadeIRChange;
public:
/// Worklist - All of the instructions that need to be simplified.
@@ -92,9 +94,11 @@ public:
bool DoOneIteration(Function &F, unsigned ItNum);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
TargetData *getTargetData() const { return TD; }
+ TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
// Visitation implementation - Implement instruction combining for different
// instruction types. The semantics are as follows:
// Return Value:
@@ -287,9 +291,9 @@ public:
return 0; // Don't do anything with FI
}
- void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+ void ComputeMaskedBits(Value *V, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0) const {
- return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask,
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d10046c10baf..05e702fa43b5 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -136,6 +136,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
return BinaryOperator::CreateAShr(NewShl, ShAmt);
}
+
+ // If this is a xor that was canonicalized from a sub, turn it back into
+ // a sub and fuse this add with it.
+ if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
+ IntegerType *IT = cast<IntegerType>(I.getType());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne);
+ if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
+ XorLHS);
+ }
}
}
@@ -189,14 +201,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// A+B --> A|B iff A and B have no bits set in common.
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
if (LHSKnownZero != 0) {
APInt RHSKnownOne(IT->getBitWidth(), 0);
APInt RHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+ ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
// No bits in common -> bitwise or.
if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
@@ -466,57 +477,57 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
- GetElementPtrInst *GEP = 0;
- ConstantExpr *CstGEP = 0;
-
- // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+ GEPOperator *GEP1 = 0, *GEP2 = 0;
+
// For now we require one side to be the base pointer "A" or a constant
- // expression derived from it.
- if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+ // GEP derived from it.
+ if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
// (gep X, ...) - X
if (LHSGEP->getOperand(0) == RHS) {
- GEP = LHSGEP;
+ GEP1 = LHSGEP;
Swapped = false;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
- // (gep X, ...) - (ce_gep X, ...)
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- LHSGEP->getOperand(0) == CE->getOperand(0)) {
- CstGEP = CE;
- GEP = LHSGEP;
+ } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+ // (gep X, ...) - (gep X, ...)
+ if (LHSGEP->getOperand(0)->stripPointerCasts() ==
+ RHSGEP->getOperand(0)->stripPointerCasts()) {
+ GEP2 = RHSGEP;
+ GEP1 = LHSGEP;
Swapped = false;
}
}
}
- if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+ if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
// X - (gep X, ...)
if (RHSGEP->getOperand(0) == LHS) {
- GEP = RHSGEP;
+ GEP1 = RHSGEP;
Swapped = true;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
- // (ce_gep X, ...) - (gep X, ...)
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- RHSGEP->getOperand(0) == CE->getOperand(0)) {
- CstGEP = CE;
- GEP = RHSGEP;
+ } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+ // (gep X, ...) - (gep X, ...)
+ if (RHSGEP->getOperand(0)->stripPointerCasts() ==
+ LHSGEP->getOperand(0)->stripPointerCasts()) {
+ GEP2 = LHSGEP;
+ GEP1 = RHSGEP;
Swapped = true;
}
}
}
- if (GEP == 0)
+ // Avoid duplicating the arithmetic if GEP2 has non-constant indices and
+ // multiple users.
+ if (GEP1 == 0 ||
+ (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
return 0;
// Emit the offset of the GEP and an intptr_t.
- Value *Result = EmitGEPOffset(GEP);
+ Value *Result = EmitGEPOffset(GEP1);
// If we had a constant expression GEP on the other side offsetting the
// pointer, subtract it from the offset we have.
- if (CstGEP) {
- Value *CstOffset = EmitGEPOffset(CstGEP);
- Result = Builder->CreateSub(Result, CstOffset);
+ if (GEP2) {
+ Value *Offset = EmitGEPOffset(GEP2);
+ Result = Builder->CreateSub(Result, Offset);
}
-
// If we have p - gep(p, ...) then we have to negate the result.
if (Swapped)
@@ -587,6 +598,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
ConstantInt *C2;
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5e0bfe8e26d2..0dbe11d2f01f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@
#include "InstCombine.h"
#include "llvm/Intrinsics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
@@ -62,50 +63,6 @@ static inline Value *dyn_castNotVal(Value *V) {
return 0;
}
-
-/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
-/// are carefully arranged to allow folding of expressions such as:
-///
-/// (A < B) | (A > B) --> (A != B)
-///
-/// Note that this is only valid if the first and second predicates have the
-/// same sign. Is illegal to do: (A u< B) | (A s> B)
-///
-/// Three bits are used to represent the condition, as follows:
-/// 0 A > B
-/// 1 A == B
-/// 2 A < B
-///
-/// <=> Value Definition
-/// 000 0 Always false
-/// 001 1 A > B
-/// 010 2 A == B
-/// 011 3 A >= B
-/// 100 4 A < B
-/// 101 5 A != B
-/// 110 6 A <= B
-/// 111 7 Always true
-///
-static unsigned getICmpCode(const ICmpInst *ICI) {
- switch (ICI->getPredicate()) {
- // False -> 0
- case ICmpInst::ICMP_UGT: return 1; // 001
- case ICmpInst::ICMP_SGT: return 1; // 001
- case ICmpInst::ICMP_EQ: return 2; // 010
- case ICmpInst::ICMP_UGE: return 3; // 011
- case ICmpInst::ICMP_SGE: return 3; // 011
- case ICmpInst::ICMP_ULT: return 4; // 100
- case ICmpInst::ICMP_SLT: return 4; // 100
- case ICmpInst::ICMP_NE: return 5; // 101
- case ICmpInst::ICMP_ULE: return 6; // 110
- case ICmpInst::ICMP_SLE: return 6; // 110
- // True -> 7
- default:
- llvm_unreachable("Invalid ICmp predicate!");
- return 0;
- }
-}
-
/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
/// predicate into a three bit mask. It also returns whether it is an ordered
/// predicate by reference.
@@ -130,31 +87,19 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
default:
// Not expecting FCMP_FALSE and FCMP_TRUE;
llvm_unreachable("Unexpected FCmp predicate!");
- return 0;
}
}
-/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// getNewICmpValue - This is the complement of getICmpCode, which turns an
/// opcode and two operands into either a constant true or false, or a brand
/// new ICmp instruction. The sign is passed in to determine which kind
/// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
- InstCombiner::BuilderTy *Builder) {
- CmpInst::Predicate Pred;
- switch (Code) {
- default: assert(0 && "Illegal ICmp code!");
- case 0: // False.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
- case 2: Pred = ICmpInst::ICMP_EQ; break;
- case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
- case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
- case 5: Pred = ICmpInst::ICMP_NE; break;
- case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
- case 7: // True.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
- }
- return Builder->CreateICmp(Pred, LHS, RHS);
+static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ ICmpInst::Predicate NewPred;
+ if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
+ return NewConstant;
+ return Builder->CreateICmp(NewPred, LHS, RHS);
}
/// getFCmpValue - This is the complement of getFCmpCode, which turns an
@@ -165,7 +110,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
InstCombiner::BuilderTy *Builder) {
CmpInst::Predicate Pred;
switch (code) {
- default: assert(0 && "Illegal FCmp code!");
+ default: llvm_unreachable("Illegal FCmp code!");
case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
@@ -180,14 +125,6 @@ static Value *getFCmpValue(bool isordered, unsigned code,
return Builder->CreateFCmp(Pred, LHS, RHS);
}
-/// PredicatesFoldable - Return true if both predicates match sign or if at
-/// least one of them is an equality comparison (which is signless).
-static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
- return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
- (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
- (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
-}
-
// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
// guaranteed to be a binary operator.
@@ -558,6 +495,38 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
return result;
}
+/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
+/// if possible. The returned predicate is either == or !=. Returns false if
+/// decomposition fails.
+static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
+ Value *&X, Value *&Y, Value *&Z) {
+ // X < 0 is equivalent to (X & SignBit) != 0.
+ if (I->getPredicate() == ICmpInst::ICMP_SLT)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (C->isZero()) {
+ X = I->getOperand(0);
+ Y = ConstantInt::get(I->getContext(),
+ APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_NE;
+ Z = C;
+ return true;
+ }
+
+ // X > -1 is equivalent to (X & SignBit) == 0.
+ if (I->getPredicate() == ICmpInst::ICMP_SGT)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (C->isAllOnesValue()) {
+ X = I->getOperand(0);
+ Y = ConstantInt::get(I->getContext(),
+ APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_EQ;
+ Z = ConstantInt::getNullValue(C->getType());
+ return true;
+ }
+
+ return false;
+}
+
/// foldLogOpOfMaskedICmpsHelper:
/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// return the set of pattern classes (from MaskedICmpType)
@@ -565,10 +534,9 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
Value*& B, Value*& C,
Value*& D, Value*& E,
- ICmpInst *LHS, ICmpInst *RHS) {
- ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
- if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
- if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+ ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate &LHSCC,
+ ICmpInst::Predicate &RHSCC) {
if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
// vectors are not (yet?) supported
if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
@@ -582,40 +550,60 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
Value *L1 = LHS->getOperand(0);
Value *L2 = LHS->getOperand(1);
Value *L11,*L12,*L21,*L22;
- if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
- if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ // Check whether the icmp can be decomposed into a bit test.
+ if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
+ L21 = L22 = L1 = 0;
+ } else {
+ // Look for ANDs in the LHS icmp.
+ if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ L21 = L22 = 0;
+ } else {
+ if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+ return 0;
+ std::swap(L1, L2);
L21 = L22 = 0;
- }
- else {
- if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
- return 0;
- std::swap(L1, L2);
- L21 = L22 = 0;
+ }
}
+ // Bail if LHS was a icmp that can't be decomposed into an equality.
+ if (!ICmpInst::isEquality(LHSCC))
+ return 0;
+
Value *R1 = RHS->getOperand(0);
Value *R2 = RHS->getOperand(1);
Value *R11,*R12;
bool ok = false;
- if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
- if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
- A = R11; D = R12; E = R2; ok = true;
+ if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+ A = R12; D = R11;
+ } else {
+ return 0;
}
- else
- if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ E = R2; R1 = 0; ok = true;
+ } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12; E = R2; ok = true;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
A = R12; D = R11; E = R2; ok = true;
}
}
+
+ // Bail if RHS was a icmp that can't be decomposed into an equality.
+ if (!ICmpInst::isEquality(RHSCC))
+ return 0;
+
+ // Look for ANDs in on the right side of the RHS icmp.
if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
- if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
- A = R11; D = R12; E = R1; ok = true;
- }
- else
- if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+ A = R11; D = R12; E = R1; ok = true;
+ } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
A = R12; D = R11; E = R1; ok = true;
- }
- else
+ } else {
return 0;
+ }
}
if (!ok)
return 0;
@@ -644,8 +632,12 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
ICmpInst::Predicate NEWCC,
llvm::InstCombiner::BuilderTy* Builder) {
Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
- unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
+ LHSCC, RHSCC);
if (mask == 0) return 0;
+ assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
+ "foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
if (NEWCC == ICmpInst::ICMP_NE)
mask >>= 1; // treat "Not"-states as normal states
@@ -693,11 +685,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
if (CCst == 0) return 0;
- if (LHS->getPredicate() != NEWCC)
+ if (LHSCC != NEWCC)
CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
if (ECst == 0) return 0;
- if (RHS->getPredicate() != NEWCC)
+ if (RHSCC != NEWCC)
ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
ConstantInt* MCst = dyn_cast<ConstantInt>(
ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
@@ -728,7 +720,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
- return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
@@ -756,24 +748,12 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
-
- // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
- if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
- Value *NewAnd = Builder->CreateAnd(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
- }
-
- // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
- if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
- }
}
// (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
// where CMAX is the all ones value for the truncated type,
// iff the lower bits of C2 and CA are zero.
- if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
LHS->hasOneUse() && RHS->hasOneUse()) {
Value *V;
ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
@@ -805,7 +785,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
}
-
+
// From here on, we only handle:
// (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
if (Val != Val2) return 0;
@@ -1382,13 +1362,8 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
// part of the value (e.g. byte 3) then it must be shifted right. If from the
// low part, it must be shifted left.
unsigned DestByteNo = InputByteNo + OverallLeftShift;
- if (InputByteNo < ByteValues.size()/2) {
- if (ByteValues.size()-1-DestByteNo != InputByteNo)
- return true;
- } else {
- if (ByteValues.size()-1-DestByteNo != InputByteNo)
- return true;
- }
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
// If the destination byte value is already defined, the values are or'd
// together, which isn't a bswap (unless it's an or of the same bits).
@@ -1469,7 +1444,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
- return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
@@ -1490,18 +1465,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
-
- // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
- if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
- }
-
- // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
- if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
- Value *NewAnd = Builder->CreateAnd(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
- }
}
// (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
@@ -1586,7 +1549,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
return ConstantInt::getTrue(LHS->getContext());
}
- break;
case ICmpInst::ICMP_ULT:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
@@ -1962,8 +1924,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// Canonicalize xor to the RHS.
- if (match(Op0, m_Xor(m_Value(), m_Value())))
+ bool SwappedForXor = false;
+ if (match(Op0, m_Xor(m_Value(), m_Value()))) {
std::swap(Op0, Op1);
+ SwappedForXor = true;
+ }
// A | ( A ^ B) -> A | B
// A | (~A ^ B) -> A | ~B
@@ -1994,6 +1959,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Not, Op0);
}
+ if (SwappedForXor)
+ std::swap(Op0, Op1);
+
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (Value *Res = FoldOrOfICmps(LHS, RHS))
@@ -2281,7 +2249,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
return ReplaceInstUsesWith(I,
- getICmpValue(isSigned, Code, Op0, Op1, Builder));
+ getNewICmpValue(isSigned, Code, Op0, Op1,
+ Builder));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c7b3ff8504ac..77e47271008c 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -37,26 +37,26 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned CopyAlign = MI->getAlignment();
if (CopyAlign < MinAlign) {
- MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
MinAlign, false));
return MI;
}
-
+
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
if (MemOpLength == 0) return 0;
-
+
// Source and destination pointer types are always "i8*" for intrinsic. See
// if the size is something we can handle with a single primitive load/store.
// A single load+store correctly handles overlapping memory in the memmove
// case.
unsigned Size = MemOpLength->getZExtValue();
if (Size == 0) return MI; // Delete this mem transfer.
-
+
if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
-
+
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
@@ -66,7 +66,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
-
+
// Memcpy forces the use of i8* for the source and destination. That means
// that if you're using memcpy to move one double around, you'll get a cast
// from double* to i8*. We'd much rather use a double load+store rather than
@@ -94,20 +94,20 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
} else
break;
}
-
+
if (SrcETy->isSingleValueType()) {
NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
}
}
}
-
-
+
+
// If the memcpy/memmove provides better alignment info than we can
// infer, use it.
SrcAlign = std::max(SrcAlign, CopyAlign);
DstAlign = std::max(DstAlign, CopyAlign);
-
+
Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
@@ -127,7 +127,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
Alignment, false));
return MI;
}
-
+
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -135,14 +135,14 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return 0;
uint64_t Len = LenC->getZExtValue();
Alignment = MI->getAlignment();
-
+
// If the length is zero, this is a no-op
if (Len == 0) return MI; // memset(d,c,0,a) -> noop
-
+
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
-
+
Value *Dest = MI->getDest();
unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
@@ -150,13 +150,13 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0) Alignment = 1;
-
+
// Extract the fill value and store.
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
MI->isVolatile());
S->setAlignment(Alignment);
-
+
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(LenC->getType()));
return MI;
@@ -165,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return 0;
}
-/// visitCallInst - CallInst simplification. This mostly only handles folding
+/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
///
@@ -182,7 +182,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
CI.setDoesNotThrow();
return &CI;
}
-
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
if (!II) return visitCallSite(&CI);
@@ -203,7 +203,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// alignment is sufficient.
}
}
-
+
// No other transformations apply to volatile transfers.
if (MI->isVolatile())
return 0;
@@ -242,13 +242,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
-
+
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
// We need target data for just about everything so depend on it.
if (!TD) break;
-
+
Type *ReturnTy = CI.getType();
uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
@@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Get the current byte offset into the thing. Use the original
// operand in case we're looking through a bitcast.
SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return 0;
Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
Op1 = GEP->getPointerOperand()->stripPointerCasts();
@@ -322,7 +324,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
if (Operand->getIntrinsicID() == Intrinsic::bswap)
return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
-
+
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
@@ -334,7 +336,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return new TruncInst(V, TI->getType());
}
}
-
+
break;
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
@@ -359,14 +361,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
- KnownZero, KnownOne);
+ ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
unsigned TrailingZeros = KnownOne.countTrailingZeros();
APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
if ((Mask & KnownZero) == Mask)
return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
APInt(BitWidth, TrailingZeros)));
-
+
}
break;
case Intrinsic::ctlz: {
@@ -378,31 +379,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
- KnownZero, KnownOne);
+ ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
unsigned LeadingZeros = KnownOne.countLeadingZeros();
APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
if ((Mask & KnownZero) == Mask)
return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
APInt(BitWidth, LeadingZeros)));
-
+
}
break;
case Intrinsic::uadd_with_overflow: {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
- APInt Mask = APInt::getSignBit(BitWidth);
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
if (LHSKnownNegative || LHSKnownPositive) {
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+ ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
if (LHSKnownNegative && RHSKnownNegative) {
@@ -448,7 +447,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// X + undef -> undef
if (isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
+
if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X + 0 -> {X, false}
if (RHS->isZero()) {
@@ -469,7 +468,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (isa<UndefValue>(II->getArgOperand(0)) ||
isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
+
if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X - 0 -> {X, false}
if (RHS->isZero()) {
@@ -477,7 +476,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
- Constant *Struct =
+ Constant *Struct =
ConstantStruct::get(cast<StructType>(II->getType()), V);
return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
@@ -486,14 +485,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::umul_with_overflow: {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+ ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
// Get the largest possible values for each operand.
APInt LHSMax = ~LHSKnownZero;
@@ -526,19 +524,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// X * undef -> undef
if (isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
+
if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X*0 -> {0, false}
if (RHSI->isZero())
return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
-
+
// X * 1 -> {X, false}
if (RHSI->equalsInt(1)) {
Constant *V[] = {
UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
- Constant *Struct =
+ Constant *Struct =
ConstantStruct::get(cast<StructType>(II->getType()), V);
return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
@@ -557,7 +555,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
- Type *OpPtrTy =
+ Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr);
@@ -568,7 +566,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
- Type *OpPtrTy =
+ Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
return new StoreInst(II->getArgOperand(1), Ptr);
@@ -621,19 +619,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
- assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
-
+ if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
+ assert(Mask->getType()->getVectorNumElements() == 16 &&
+ "Bad type for intrinsic!");
+
// Check that all of the elements are integer constants or undefs.
bool AllEltsOk = true;
for (unsigned i = 0; i != 16; ++i) {
- if (!isa<ConstantInt>(Mask->getOperand(i)) &&
- !isa<UndefValue>(Mask->getOperand(i))) {
+ Constant *Elt = Mask->getAggregateElement(i);
+ if (Elt == 0 ||
+ !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
AllEltsOk = false;
break;
}
}
-
+
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
@@ -641,23 +641,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
-
+
// Only extract each element once.
Value *ExtractedElts[32];
memset(ExtractedElts, 0, sizeof(ExtractedElts));
-
+
for (unsigned i = 0; i != 16; ++i) {
- if (isa<UndefValue>(Mask->getOperand(i)))
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
continue;
- unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+ unsigned Idx =
+ cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
-
+
if (ExtractedElts[Idx] == 0) {
- ExtractedElts[Idx] =
+ ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
Builder->getInt32(Idx&15));
}
-
+
// Insert this value into the result vector.
Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
Builder->getInt32(i));
@@ -703,7 +704,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return EraseInstFromFunction(CI);
}
}
-
+
// Scan down this block to see if there is another stack restore in the
// same block without an intervening call/alloca.
BasicBlock::iterator BI = II;
@@ -728,12 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
}
-
+
// If the stack restore is in a return, resume, or unwind block and if there
// are no allocas or calls between the restore and the return, nuke the
// restore.
- if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) ||
- isa<UnwindInst>(TI)))
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
return EraseInstFromFunction(CI);
break;
}
@@ -748,7 +748,7 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
return visitCallSite(&II);
}
-/// isSafeToEliminateVarargsCast - If this cast does not affect the value
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
const CastInst * const CI,
@@ -760,10 +760,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// The size of ByVal arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
- if (!CS.paramHasAttr(ix, Attribute::ByVal))
+ if (!CS.isByValArgument(ix))
return true;
- Type* SrcTy =
+ Type* SrcTy =
cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
@@ -807,7 +807,7 @@ public:
} // end anonymous namespace
// Try to fold some different type of calls here.
-// Currently we're only working with the checking functions, memcpy_chk,
+// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
@@ -916,7 +916,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
!CalleeF->isDeclaration()) {
Instruction *OldCall = CS.getInstruction();
new StoreInst(ConstantInt::getTrue(Callee->getContext()),
- UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
OldCall);
// If OldCall dues not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
@@ -924,7 +924,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
if (isa<CallInst>(OldCall))
return EraseInstFromFunction(*OldCall);
-
+
// We cannot remove an invoke, because it would change the CFG, just
// change the callee to a null pointer.
cast<InvokeInst>(OldCall)->setCalledFunction(
@@ -960,7 +960,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
if (FTy->isVarArg()) {
- int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+ int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
@@ -1061,17 +1061,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CastInst::isCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+ Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
if (Attrs & Attribute::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
-
+
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
-
+
Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
if (TD->getTypeAllocSize(CurElTy) !=
TD->getTypeAllocSize(ParamPTy->getElementType()))
@@ -1099,8 +1099,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
return false;
+
+ // If both the callee and the cast type are varargs, we still have to make
+ // sure the number of fixed parameters are the same or we have the same
+ // ABI issues as if we introduce a varargs call.
+ if (FT->isVarArg() &&
+ cast<FunctionType>(APTy->getElementType())->isVarArg() &&
+ FT->getNumParams() !=
+ cast<FunctionType>(APTy->getElementType())->getNumParams())
+ return false;
}
-
+
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
!CallerPAL.isEmpty())
// In this case we have more arguments than the new function type, but we
@@ -1114,7 +1123,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false;
}
-
+
// Okay, we decided that this is a safe thing to do: go ahead and start
// inserting cast instructions as necessary.
std::vector<Value*> Args;
@@ -1352,11 +1361,11 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Replace the trampoline call with a direct call. Let the generic
// code sort out any function type mismatches.
- FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
FTy->isVarArg());
Constant *NewCallee =
NestF->getType() == PointerType::getUnqual(NewFTy) ?
- NestF : ConstantExpr::getBitCast(NestF,
+ NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
NewAttrs.end());
@@ -1385,9 +1394,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// parameter, there is no need to adjust the argument list. Let the generic
// code sort out any function type mismatches.
Constant *NewCallee =
- NestF->getType() == PTy ? NestF :
+ NestF->getType() == PTy ? NestF :
ConstantExpr::getBitCast(NestF, PTy);
CS.setCalledFunction(NewCallee);
return CS.getInstruction();
}
-
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f10e48abf108..39279f437205 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,6 +14,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-
-
/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
@@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ C = ConstantFoldConstantExpression(CE, TD, TLI);
return C;
}
@@ -216,7 +215,6 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
default:
// TODO: Can handle more cases here.
llvm_unreachable("Unreachable!");
- break;
}
Res->takeName(I);
@@ -528,9 +526,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return ReplaceInstUsesWith(CI, In);
}
-
-
-
+
// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
// zext (X == 1) to i32 --> X iff X has only the low bit set.
@@ -545,8 +541,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- APInt TypeMask(APInt::getAllOnesValue(BitWidth));
- ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+ ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -594,9 +589,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- APInt TypeMask(APInt::getAllOnesValue(BitWidth));
- ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
- ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+ ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
+ ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
APInt KnownBits = KnownZeroLHS | KnownOneLHS;
@@ -915,8 +909,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- APInt TypeMask(APInt::getAllOnesValue(BitWidth));
- ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+ ComputeMaskedBits(Op0, KnownZero, KnownOne);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) {
@@ -1163,6 +1156,9 @@ static Value *LookThroughFPExtensions(Value *V) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
return V; // No constant folding of this.
+ // See if the value can be truncated to half and then reextended.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+ return V;
// See if the value can be truncated to float and then reextended.
if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
return V;
@@ -1213,10 +1209,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
- // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
- if (Call && Call->getCalledFunction() &&
- Call->getCalledFunction()->getName() == "sqrt" &&
+ if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+ Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1423,16 +1418,15 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
- SmallVector<Constant*, 16> ShuffleMask;
+ SmallVector<uint32_t, 16> ShuffleMask;
Value *V2;
- IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
if (SrcTy->getNumElements() > DestTy->getNumElements()) {
// If we're shrinking the number of elements, just shuffle in the low
// elements from the input and use undef as the second shuffle input.
V2 = UndefValue::get(SrcTy);
for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
- ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+ ShuffleMask.push_back(i);
} else {
// If we're increasing the number of elements, shuffle in all of the
@@ -1441,14 +1435,16 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
V2 = Constant::getNullValue(SrcTy);
unsigned SrcElts = SrcTy->getNumElements();
for (unsigned i = 0, e = SrcElts; i != e; ++i)
- ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+ ShuffleMask.push_back(i);
// The excess elements reference the first element of the zero input.
- ShuffleMask.append(DestTy->getNumElements()-SrcElts,
- ConstantInt::get(Int32Ty, SrcElts));
+ for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
+ ShuffleMask.push_back(SrcElts);
}
- return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
+ return new ShuffleVectorInst(InVal, V2,
+ ConstantDataVector::get(V2->getContext(),
+ ShuffleMask));
}
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bb1cbfade34d..ab2987ff24cd 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -203,8 +203,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// We need TD information to know the pointer size unless this is inbounds.
if (!GEP->isInBounds() && TD == 0) return 0;
- ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
- if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+ Constant *Init = GV->getInitializer();
+ if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+ return 0;
+
+ uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+ if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays.
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
@@ -221,7 +225,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// structs.
SmallVector<unsigned, 4> LaterIndices;
- Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+ Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (Idx == 0) return 0; // Variable index.
@@ -272,8 +276,9 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
- for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
- Constant *Elt = Init->getOperand(i);
+ for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
+ Constant *Elt = Init->getAggregateElement(i);
+ if (Elt == 0) return 0;
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty())
@@ -284,7 +289,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
- CompareRHS, TD);
+ CompareRHS, TD, TLI);
// If the result is undef for this element, ignore it.
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
@@ -440,10 +445,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If a 32-bit or 64-bit magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
- if (Init->getNumOperands() <= 32 ||
- (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+ if (ArrayElementCount <= 32 ||
+ (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
Type *Ty;
- if (Init->getNumOperands() <= 32)
+ if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
else
Ty = Type::getInt64Ty(Init->getContext());
@@ -566,6 +571,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond,
Instruction &I) {
+ // Don't transform signed compares of GEPs into index compares. Even if the
+ // GEP is inbounds, the final add of the base pointer can have signed overflow
+ // and would change the result of the icmp.
+ // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
+ // the maximum signed value for the pointer type.
+ if (ICmpInst::isSigned(Cond))
+ return 0;
+
// Look through bitcasts.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
RHS = BCI->getOperand(0);
@@ -602,6 +615,20 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+ // If we're comparing GEPs with two base pointers that only differ in type
+ // and both GEPs have only constant indices or just one use, then fold
+ // the compare with the adjusted indices.
+ if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
+ (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
+ PtrBase->stripPointerCasts() ==
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
+ Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
+ EmitGEPOffset(GEPLHS),
+ EmitGEPOffset(GEPRHS));
+ return ReplaceInstUsesWith(I, Cmp);
+ }
+
// Otherwise, the base pointers are different and the indices are
// different, bail out.
return 0;
@@ -1001,9 +1028,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// of the high bits truncated out of x are known.
unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
- APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
- ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+ ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne);
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
@@ -1657,6 +1683,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
return 0;
+ // This is only really a signed overflow check if the inputs have been
+ // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+ // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+ unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+ if (IC.ComputeNumSignBits(A) < NeededSignBits ||
+ IC.ComputeNumSignBits(B) < NeededSignBits)
+ return 0;
+
// In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
// and truncates that discard the high bits of the add. Verify that this is
@@ -1787,6 +1821,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // comparing -val or val with non-zero is the same as just comparing val
+ // ie, abs(val) != 0 -> val != 0
+ if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
+ {
+ Value *Cond, *SelectTrue, *SelectFalse;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+ m_Value(SelectFalse)))) {
+ if (Value *V = dyn_castNegVal(SelectTrue)) {
+ if (V == SelectFalse)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ else if (Value *V = dyn_castNegVal(SelectFalse)) {
+ if (V == SelectTrue)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ }
+ }
+
Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
@@ -2683,6 +2735,17 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
}
+ } else {
+ // See if the RHS value is < UnsignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
+ Pred == ICmpInst::ICMP_UGE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
}
// Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
@@ -2822,7 +2885,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
const fltSemantics *Sem;
// FIXME: This shouldn't be here.
- if (LHSExt->getSrcTy()->isFloatTy())
+ if (LHSExt->getSrcTy()->isHalfTy())
+ Sem = &APFloat::IEEEhalf;
+ else if (LHSExt->getSrcTy()->isFloatTy())
Sem = &APFloat::IEEEsingle;
else if (LHSExt->getSrcTy()->isDoubleTy())
Sem = &APFloat::IEEEdouble;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 7446a51a4db1..b2f2e248e417 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -22,6 +22,72 @@ using namespace llvm;
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+// Try to kill dead allocas by walking through its uses until we see some use
+// that could escape. This is a conservative analysis which tries to handle
+// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things
+// left after inlining and SROA finish chewing on an alloca.
+static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) {
+ SmallVector<Instruction *, 4> Worklist, DeadStores;
+ Worklist.push_back(&AI);
+ do {
+ Instruction *PI = Worklist.pop_back_val();
+ for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end();
+ UI != UE; ++UI) {
+ Instruction *I = cast<Instruction>(*UI);
+ switch (I->getOpcode()) {
+ default:
+ // Give up the moment we see something we can't handle.
+ return 0;
+
+ case Instruction::GetElementPtr:
+ case Instruction::BitCast:
+ Worklist.push_back(I);
+ continue;
+
+ case Instruction::Call:
+ // We can handle a limited subset of calls to no-op intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ continue;
+ default:
+ return 0;
+ }
+ }
+ // Reject everything else.
+ return 0;
+
+ case Instruction::Store: {
+ // Stores into the alloca are only live if the alloca is live.
+ StoreInst *SI = cast<StoreInst>(I);
+ // We can eliminate atomic stores, but not volatile.
+ if (SI->isVolatile())
+ return 0;
+ // The store is only trivially safe if the poniter is the destination
+ // as opposed to the value. We're conservative here and don't check for
+ // the case where we store the address of a dead alloca into a dead
+ // alloca.
+ if (SI->getPointerOperand() != PI)
+ return 0;
+ DeadStores.push_back(I);
+ continue;
+ }
+ }
+ }
+ } while (!Worklist.empty());
+
+ // The alloca is dead. Kill off all the stores to it, and then replace it
+ // with undef.
+ while (!DeadStores.empty())
+ IC.EraseInstFromFunction(*DeadStores.pop_back_val());
+ return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType()));
+}
+
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
@@ -81,7 +147,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
}
- return 0;
+ // Try to aggressively remove allocas which are only used for GEPs, lifetime
+ // markers, and stores. This happens when SROA iteratively promotes stores
+ // out of the alloca, and we need to cleanup after it.
+ return removeDeadAlloca(*this, AI);
}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7f48125a97ab..5168e2a113ca 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -256,22 +256,18 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // Simplify mul instructions with a constant RHS...
+ // Simplify mul instructions with a constant RHS.
if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
// "In IEEE floating point, x*1 is not equivalent to x for nans. However,
// ANSI says we can drop signals, so we can do this anyway." (from GCC)
if (Op1F->isExactlyValue(1.0))
return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0'
- } else if (Op1C->getType()->isVectorTy()) {
- if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
- // As above, vector X*splat(1.0) -> X in all defined cases.
- if (Constant *Splat = Op1V->getSplatValue()) {
- if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
- if (F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0);
- }
- }
+ } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) {
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
}
// Try to fold constant mul into select arguments.
@@ -441,19 +437,23 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
-
- if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+
+ {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
- if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+ const APInt *C;
+ if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr =
- BinaryOperator::CreateLShr(Op0,
- ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(),
+ C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
}
+ }
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
// X udiv C, where C >= signbit
if (C->getValue().isNegative()) {
Value *IC = Builder->CreateICmpULT(Op0, C);
@@ -684,28 +684,36 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
}
// If it's a constant vector, flip any negative values positive.
- if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
- unsigned VWidth = RHSV->getNumOperands();
+ if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
+ Constant *C = cast<Constant>(Op1);
+ unsigned VWidth = C->getType()->getVectorNumElements();
bool hasNegative = false;
- for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+ bool hasMissing = false;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = C->getAggregateElement(i);
+ if (Elt == 0) {
+ hasMissing = true;
+ break;
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt))
if (RHS->isNegative())
hasNegative = true;
+ }
- if (hasNegative) {
- std::vector<Constant *> Elts(VWidth);
+ if (hasNegative && !hasMissing) {
+ SmallVector<Constant *, 16> Elts(VWidth);
for (unsigned i = 0; i != VWidth; ++i) {
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+ Elts[i] = C->getAggregateElement(i); // Handle undef, etc.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) {
if (RHS->isNegative())
Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
- else
- Elts[i] = RHS;
}
}
Constant *NewRHSV = ConstantVector::get(Elts);
- if (NewRHSV != RHSV) {
+ if (NewRHSV != C) { // Don't loop on -MININT
Worklist.AddValue(I.getOperand(1));
I.setOperand(1, NewRHSV);
return &I;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 91e60a4fb244..e727b2c592db 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -184,7 +184,6 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
}
llvm_unreachable("Shouldn't get here");
- return 0;
}
static bool isSelect01(Constant *C1, Constant *C2) {
@@ -282,7 +281,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -294,17 +294,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
}
// Same for CmpInsts.
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ TLI);
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ TLI);
}
// TODO: We could hand off more cases to instsimplify here.
@@ -330,7 +332,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD);
+ ConstOps, TD, TLI);
}
}
@@ -479,18 +481,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -679,6 +681,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateOr(CondVal, FalseVal);
else if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+ // select a, ~a, b -> (~a)&b
+ // select a, b, ~a -> (~a)|b
+ if (match(TrueVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+ else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
// Selecting between two integer constants?
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 6d85adde9b85..b31049e59f18 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData(),
+ IC.getTargetLibraryInfo());
return V;
}
@@ -198,7 +199,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
IC.Worklist.Add(I);
switch (I->getOpcode()) {
- default: assert(0 && "Inconsistency with CanEvaluateShifted");
+ default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
@@ -535,12 +536,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
Value *X = ShiftOp->getOperand(0);
- uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
-
IntegerType *Ty = cast<IntegerType>(I.getType());
// Check for (X << c1) << c2 and (X >> c1) >> c2
if (I.getOpcode() == ShiftOp->getOpcode()) {
+ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
// If this is oversized composite shift, then unsigned shifts get 0, ashr
// saturates.
if (AmtSum >= TypeBits) {
@@ -576,7 +576,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
ShiftOp->getOpcode() != Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
- Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ if (ShiftOp->isExact()) {
+ // (X >>?,exact C1) << C2 --> X << (C2-C1)
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+ return NewShl;
+ }
+ Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
@@ -586,15 +595,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
if (I.getOpcode() == Instruction::LShr &&
ShiftOp->getOpcode() == Instruction::Shl) {
- assert(ShiftOp->getOpcode() == Instruction::Shl);
- Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+ if (ShiftOp->hasNoUnsignedWrap()) {
+ BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
+ X, ShiftDiffCst);
+ NewLShr->setIsExact(I.isExact());
+ return NewLShr;
+ }
+ Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
}
-
- // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ if (I.getOpcode() == Instruction::AShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ if (ShiftOp->hasNoSignedWrap()) {
+ // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
+ X, ShiftDiffCst);
+ NewAShr->setIsExact(I.isExact());
+ return NewAShr;
+ }
+ }
} else {
assert(ShiftAmt2 < ShiftAmt1);
uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
@@ -602,9 +630,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
if (I.getOpcode() == Instruction::Shl &&
ShiftOp->getOpcode() != Instruction::Shl) {
- Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
- ConstantInt::get(Ty, ShiftDiff));
-
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ if (ShiftOp->isExact()) {
+ // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+ BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
+ X, ShiftDiffCst);
+ NewShr->setIsExact(true);
+ return NewShr;
+ }
+ Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
+ X, ShiftDiffCst);
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
@@ -613,14 +648,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
if (I.getOpcode() == Instruction::LShr &&
ShiftOp->getOpcode() == Instruction::Shl) {
- Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ if (ShiftOp->hasNoUnsignedWrap()) {
+ // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoUnsignedWrap(true);
+ return NewShl;
+ }
+ Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
return BinaryOperator::CreateAnd(Shift,
ConstantInt::get(I.getContext(),Mask));
}
- // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ if (I.getOpcode() == Instruction::AShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ if (ShiftOp->hasNoSignedWrap()) {
+ // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
+ ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+ BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+ X, ShiftDiffCst);
+ NewShl->setHasNoSignedWrap(true);
+ return NewShl;
+ }
+ }
}
}
return 0;
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 5cd9a4b7954c..125c74a89a11 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -142,7 +142,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
return 0; // Only analyze instructions.
}
@@ -156,10 +156,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
@@ -180,10 +178,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- ComputeMaskedBits(I->getOperand(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
@@ -206,7 +202,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Compute the KnownZero/KnownOne bits to simplify things downstream.
- ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
return 0;
}
@@ -219,7 +215,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
switch (I->getOpcode()) {
default:
- ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
@@ -567,9 +563,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
LHSKnownZero, LHSKnownOne, Depth+1))
return I;
}
+
// Otherwise just hand the sub off to ComputeMaskedBits to fill in
// the known zeros and ones.
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ APInt I0 = C0->getValue();
+ if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
+ Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
+ return InsertNewInstWith(Xor, *I);
+ }
+ }
break;
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -671,8 +678,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
(HighBits & ~DemandedMask) == HighBits) {
// Perform the logical shift right.
- Instruction *NewVal = BinaryOperator::CreateLShr(
- I->getOperand(0), SA, I->getName());
+ BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
+ SA, I->getName());
+ NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
return InsertNewInstWith(NewVal, *I);
} else if ((KnownOne & SignBit) != 0) { // New bits are known one.
KnownOne |= HighBits;
@@ -717,10 +725,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
- APInt Mask2 = APInt::getSignBit(BitWidth);
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
- Depth+1);
+ ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero |= LHSKnownZero;
@@ -783,7 +789,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return 0;
}
}
- ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
break;
}
@@ -822,46 +828,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
UndefElts = 0;
- if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+
+ // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts.isAllOnesValue())
+ return 0;
+
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Undef = UndefValue::get(EltTy);
-
- std::vector<Constant*> Elts;
- for (unsigned i = 0; i != VWidth; ++i)
+
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
if (!DemandedElts[i]) { // If not demanded, set to undef.
Elts.push_back(Undef);
UndefElts.setBit(i);
- } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef.
+ continue;
+ }
+
+ Constant *Elt = C->getAggregateElement(i);
+ if (Elt == 0) return 0;
+
+ if (isa<UndefValue>(Elt)) { // Already undef.
Elts.push_back(Undef);
UndefElts.setBit(i);
} else { // Otherwise, defined.
- Elts.push_back(CV->getOperand(i));
+ Elts.push_back(Elt);
}
-
- // If we changed the constant, return it.
- Constant *NewCP = ConstantVector::get(Elts);
- return NewCP != CV ? NewCP : 0;
- }
-
- if (isa<ConstantAggregateZero>(V)) {
- // Simplify the CAZ to a ConstantVector where the non-demanded elements are
- // set to undef.
-
- // Check if this is identity. If so, return 0 since we are not simplifying
- // anything.
- if (DemandedElts.isAllOnesValue())
- return 0;
-
- Type *EltTy = cast<VectorType>(V->getType())->getElementType();
- Constant *Zero = Constant::getNullValue(EltTy);
- Constant *Undef = UndefValue::get(EltTy);
- std::vector<Constant*> Elts;
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elt = DemandedElts[i] ? Zero : Undef;
- Elts.push_back(Elt);
}
- UndefElts = DemandedElts ^ EltMask;
- return ConstantVector::get(Elts);
+
+ // If we changed the constant, return it.
+ Constant *NewCV = ConstantVector::get(Elts);
+ return NewCV != C ? NewCV : 0;
}
// Limit search depth.
@@ -977,7 +976,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (NewUndefElts) {
// Add additional discovered undefs.
- std::vector<Constant*> Elts;
+ SmallVector<Constant*, 16> Elts;
for (unsigned i = 0; i < VWidth; ++i) {
if (UndefElts[i])
Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 154267c03465..cf60f0f426dc 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -16,16 +16,16 @@
using namespace llvm;
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation.
+/// is to leave as a vector operation. isConstant indicates whether we're
+/// extracting one known element. If false we're extracting a variable index.
static bool CheapToScalarize(Value *V, bool isConstant) {
- if (isa<ConstantAggregateZero>(V))
- return true;
- if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
if (isConstant) return true;
- // If all elts are the same, we can extract.
- Constant *Op0 = C->getOperand(0);
- for (unsigned i = 1; i < C->getNumOperands(); ++i)
- if (C->getOperand(i) != Op0)
+
+ // If all elts are the same, we can extract it and use any of the values.
+ Constant *Op0 = C->getAggregateElement(0U);
+ for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
+ if (C->getAggregateElement(i) != Op0)
return false;
return true;
}
@@ -53,41 +53,18 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
return false;
}
-/// getShuffleMask - Read and decode a shufflevector mask.
-/// Turn undef elements into negative values.
-static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
- unsigned NElts = SVI->getType()->getNumElements();
- if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
- return std::vector<int>(NElts, 0);
- if (isa<UndefValue>(SVI->getOperand(2)))
- return std::vector<int>(NElts, -1);
-
- std::vector<int> Result;
- const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
- for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
- if (isa<UndefValue>(*i))
- Result.push_back(-1); // undef
- else
- Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
- return Result;
-}
-
/// FindScalarElement - Given a vector and an element number, see if the scalar
/// value is already around as a register, for example if it were inserted then
/// extracted from the vector.
static Value *FindScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
- VectorType *PTy = cast<VectorType>(V->getType());
- unsigned Width = PTy->getNumElements();
+ VectorType *VTy = cast<VectorType>(V->getType());
+ unsigned Width = VTy->getNumElements();
if (EltNo >= Width) // Out of range access.
- return UndefValue::get(PTy->getElementType());
+ return UndefValue::get(VTy->getElementType());
- if (isa<UndefValue>(V))
- return UndefValue::get(PTy->getElementType());
- if (isa<ConstantAggregateZero>(V))
- return Constant::getNullValue(PTy->getElementType());
- if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
- return CP->getOperand(EltNo);
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->getAggregateElement(EltNo);
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
@@ -106,11 +83,10 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
- unsigned LHSWidth =
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
- int InEl = getShuffleMask(SVI)[EltNo];
+ unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+ int InEl = SVI->getMaskValue(EltNo);
if (InEl < 0)
- return UndefValue::get(PTy->getElementType());
+ return UndefValue::get(VTy->getElementType());
if (InEl < (int)LHSWidth)
return FindScalarElement(SVI->getOperand(0), InEl);
return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
@@ -121,27 +97,11 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
- // If vector val is undef, replace extract with scalar undef.
- if (isa<UndefValue>(EI.getOperand(0)))
- return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
- // If vector val is constant 0, replace extract with scalar 0.
- if (isa<ConstantAggregateZero>(EI.getOperand(0)))
- return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
-
- if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
- // If vector val is constant with all elements the same, replace EI with
- // that element. When the elements are not identical, we cannot replace yet
- // (we do that below, but only when the index is constant).
- Constant *op0 = C->getOperand(0);
- for (unsigned i = 1; i != C->getNumOperands(); ++i)
- if (C->getOperand(i) != op0) {
- op0 = 0;
- break;
- }
- if (op0)
- return ReplaceInstUsesWith(EI, op0);
- }
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. We handle a known element # below.
+ if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
+ if (CheapToScalarize(C, false))
+ return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
@@ -175,8 +135,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
- if (VectorType *VT =
- dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+ if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
@@ -212,10 +171,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
- int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+ int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
Value *Src;
unsigned LHSWidth =
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+ SVI->getOperand(0)->getType()->getVectorNumElements();
if (SrcIdx < 0)
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@@ -248,7 +207,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
/// elements from either LHS or RHS, return the shuffle mask and true.
/// Otherwise, return false.
static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
- std::vector<Constant*> &Mask) {
+ SmallVectorImpl<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
@@ -325,7 +284,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
/// that computes V and the LHS value of the shuffle.
-static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
Value *&RHS) {
assert(V->getType()->isVectorTy() &&
(RHS == 0 || V->getType() == RHS->getType()) &&
@@ -335,10 +294,14 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return V;
- } else if (isa<ConstantAggregateZero>(V)) {
+ }
+
+ if (isa<ConstantAggregateZero>(V)) {
Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
return V;
- } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
// If this is an insert of an extract from some other vector, include it.
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
@@ -421,7 +384,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// If this insertelement isn't used by some other insertelement, turn it
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
- std::vector<Constant*> Mask;
+ SmallVector<Constant*, 16> Mask;
Value *RHS = 0;
Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
if (RHS == 0) RHS = UndefValue::get(LHS->getType());
@@ -447,7 +410,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
- std::vector<int> Mask = getShuffleMask(&SVI);
+ SmallVector<int, 16> Mask = SVI.getShuffleMask();
bool MadeChange = false;
@@ -457,9 +420,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
- if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
- return 0;
-
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
@@ -470,29 +430,34 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
MadeChange = true;
}
+ unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
+
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
// Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
if (isa<UndefValue>(LHS) && LHS == RHS) {
// shuffle(undef,undef,mask) -> undef.
- return ReplaceInstUsesWith(SVI, LHS);
+ Value* result = (VWidth == LHSWidth)
+ ? LHS : UndefValue::get(SVI.getType());
+ return ReplaceInstUsesWith(SVI, result);
}
// Remap any references to RHS to use LHS.
- std::vector<Constant*> Elts;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] < 0)
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
+ if (Mask[i] < 0) {
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
- else {
- if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
- (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
- Mask[i] = -1; // Turn into undef.
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
- } else {
- Mask[i] = Mask[i] % e; // Force to LHS.
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
- Mask[i]));
- }
+ continue;
+ }
+
+ if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
+ Mask[i] = -1; // Turn into undef.
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+ Mask[i]));
}
}
SVI.setOperand(0, SVI.getOperand(1));
@@ -503,72 +468,204 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
MadeChange = true;
}
- // Analyze the shuffle, are the LHS or RHS and identity shuffles?
- bool isLHSID = true, isRHSID = true;
+ if (VWidth == LHSWidth) {
+ // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+ bool isLHSID = true, isRHSID = true;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] < 0) continue; // Ignore undef values.
- // Is this an identity shuffle of the LHS value?
- isLHSID &= (Mask[i] == (int)i);
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == (int)i);
- // Is this an identity shuffle of the RHS value?
- isRHSID &= (Mask[i]-e == i);
- }
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
- // Eliminate identity shuffles.
- if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
- if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+ // Eliminate identity shuffles.
+ if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+ }
// If the LHS is a shufflevector itself, see if we can combine it with this
- // one without producing an unusual shuffle. Here we are really conservative:
+ // one without producing an unusual shuffle.
+ // Cases that might be simplified:
+ // 1.
+ // x1=shuffle(v1,v2,mask1)
+ // x=shuffle(x1,undef,mask)
+ // ==>
+ // x=shuffle(v1,undef,newMask)
+ // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
+ // 2.
+ // x1=shuffle(v1,undef,mask1)
+ // x=shuffle(x1,x2,mask)
+ // where v1.size() == mask1.size()
+ // ==>
+ // x=shuffle(v1,x2,newMask)
+ // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
+ // 3.
+ // x2=shuffle(v2,undef,mask2)
+ // x=shuffle(x1,x2,mask)
+ // where v2.size() == mask2.size()
+ // ==>
+ // x=shuffle(x1,v2,newMask)
+ // newMask[i] = (mask[i] < x1.size())
+ // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
+ // 4.
+ // x1=shuffle(v1,undef,mask1)
+ // x2=shuffle(v2,undef,mask2)
+ // x=shuffle(x1,x2,mask)
+ // where v1.size() == v2.size()
+ // ==>
+ // x=shuffle(v1,v2,newMask)
+ // newMask[i] = (mask[i] < x1.size())
+ // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
+ //
+ // Here we are really conservative:
// we are absolutely afraid of producing a shuffle mask not in the input
// program, because the code gen may not be smart enough to turn a merged
// shuffle into two specific shuffles: it may produce worse code. As such,
// we only merge two shuffles if the result is either a splat or one of the
- // two input shuffle masks. In this case, merging the shuffles just removes
+ // input shuffle masks. In this case, merging the shuffles just removes
// one instruction, which we know is safe. This is good for things like
- // turning: (splat(splat)) -> splat.
- if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+ // turning: (splat(splat)) -> splat, or
+ // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
+ ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
+ ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
+ if (LHSShuffle)
+ if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
+ LHSShuffle = NULL;
+ if (RHSShuffle)
+ if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
+ RHSShuffle = NULL;
+ if (!LHSShuffle && !RHSShuffle)
+ return MadeChange ? &SVI : 0;
+
+ Value* LHSOp0 = NULL;
+ Value* LHSOp1 = NULL;
+ Value* RHSOp0 = NULL;
+ unsigned LHSOp0Width = 0;
+ unsigned RHSOp0Width = 0;
+ if (LHSShuffle) {
+ LHSOp0 = LHSShuffle->getOperand(0);
+ LHSOp1 = LHSShuffle->getOperand(1);
+ LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
+ }
+ if (RHSShuffle) {
+ RHSOp0 = RHSShuffle->getOperand(0);
+ RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
+ }
+ Value* newLHS = LHS;
+ Value* newRHS = RHS;
+ if (LHSShuffle) {
+ // case 1
if (isa<UndefValue>(RHS)) {
- std::vector<int> LHSMask = getShuffleMask(LHSSVI);
-
- if (LHSMask.size() == Mask.size()) {
- std::vector<int> NewMask;
- bool isSplat = true;
- int SplatElt = -1; // undef
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- int MaskElt;
- if (Mask[i] < 0 || Mask[i] >= (int)e)
- MaskElt = -1; // undef
- else
- MaskElt = LHSMask[Mask[i]];
- // Check if this could still be a splat.
- if (MaskElt >= 0) {
- if (SplatElt >=0 && SplatElt != MaskElt)
- isSplat = false;
- SplatElt = MaskElt;
- }
- NewMask.push_back(MaskElt);
- }
+ newLHS = LHSOp0;
+ newRHS = LHSOp1;
+ }
+ // case 2 or 4
+ else if (LHSOp0Width == LHSWidth) {
+ newLHS = LHSOp0;
+ }
+ }
+ // case 3 or 4
+ if (RHSShuffle && RHSOp0Width == LHSWidth) {
+ newRHS = RHSOp0;
+ }
+ // case 4
+ if (LHSOp0 == RHSOp0) {
+ newLHS = LHSOp0;
+ newRHS = NULL;
+ }
- // If the result mask is equal to the src shuffle or this
- // shuffle mask, do the replacement.
- if (isSplat || NewMask == LHSMask || NewMask == Mask) {
- std::vector<Constant*> Elts;
- Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
- for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
- if (NewMask[i] < 0) {
- Elts.push_back(UndefValue::get(Int32Ty));
- } else {
- Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
- }
- }
- return new ShuffleVectorInst(LHSSVI->getOperand(0),
- LHSSVI->getOperand(1),
- ConstantVector::get(Elts));
+ if (newLHS == LHS && newRHS == RHS)
+ return MadeChange ? &SVI : 0;
+
+ SmallVector<int, 16> LHSMask;
+ SmallVector<int, 16> RHSMask;
+ if (newLHS != LHS)
+ LHSMask = LHSShuffle->getShuffleMask();
+ if (RHSShuffle && newRHS != RHS)
+ RHSMask = RHSShuffle->getShuffleMask();
+
+ unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
+ SmallVector<int, 16> newMask;
+ bool isSplat = true;
+ int SplatElt = -1;
+ // Create a new mask for the new ShuffleVectorInst so that the new
+ // ShuffleVectorInst is equivalent to the original one.
+ for (unsigned i = 0; i < VWidth; ++i) {
+ int eltMask;
+ if (Mask[i] == -1) {
+ // This element is an undef value.
+ eltMask = -1;
+ } else if (Mask[i] < (int)LHSWidth) {
+ // This element is from left hand side vector operand.
+ //
+ // If LHS is going to be replaced (case 1, 2, or 4), calculate the
+ // new mask value for the element.
+ if (newLHS != LHS) {
+ eltMask = LHSMask[Mask[i]];
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value.
+ if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
+ eltMask = -1;
+ }
+ else
+ eltMask = Mask[i];
+ } else {
+ // This element is from right hand side vector operand
+ //
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value. (case 1)
+ if (isa<UndefValue>(RHS))
+ eltMask = -1;
+ // If RHS is going to be replaced (case 3 or 4), calculate the
+ // new mask value for the element.
+ else if (newRHS != RHS) {
+ eltMask = RHSMask[Mask[i]-LHSWidth];
+ // If the value selected is an undef value, explicitly specify it
+ // with a -1 mask value.
+ if (eltMask >= (int)RHSOp0Width) {
+ assert(isa<UndefValue>(RHSShuffle->getOperand(1))
+ && "should have been check above");
+ eltMask = -1;
}
}
+ else
+ eltMask = Mask[i]-LHSWidth;
+
+ // If LHS's width is changed, shift the mask value accordingly.
+ // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
+ // references to RHSOp0 to LHSOp0, so we don't need to shift the mask.
+ if (eltMask >= 0 && newRHS != NULL)
+ eltMask += newLHSWidth;
+ }
+
+ // Check if this could still be a splat.
+ if (eltMask >= 0) {
+ if (SplatElt >= 0 && SplatElt != eltMask)
+ isSplat = false;
+ SplatElt = eltMask;
+ }
+
+ newMask.push_back(eltMask);
+ }
+
+ // If the result mask is equal to one of the original shuffle masks,
+ // or is a splat, do the replacement.
+ if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
+ SmallVector<Constant*, 16> Elts;
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+ for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
+ if (newMask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
+ } else {
+ Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
+ }
}
+ if (newRHS == NULL)
+ newRHS = UndefValue::get(newLHS->getType());
+ return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
}
return MadeChange ? &SVI : 0;
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 32009c39ec25..99a02fc0df3f 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -55,9 +55,9 @@ public:
Worklist.reserve(NumEntries+16);
WorklistMap.resize(NumEntries);
DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
- for (; NumEntries; --NumEntries) {
+ for (unsigned Idx = 0; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
- WorklistMap.insert(std::make_pair(I, Worklist.size()));
+ WorklistMap.insert(std::make_pair(I, Idx++));
Worklist.push_back(I);
}
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c15b8058f292..066b2ec89c3e 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -41,6 +41,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
}
char InstCombiner::ID = 0;
-INITIALIZE_PASS(InstCombiner, "instcombine",
+INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstCombiner, "instcombine",
"Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
@@ -490,7 +495,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantExpr::getNeg(C);
- if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
if (C->getType()->getElementType()->isIntegerTy())
return ConstantExpr::getNeg(C);
@@ -509,7 +514,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
if (ConstantFP *C = dyn_cast<ConstantFP>(V))
return ConstantExpr::getFNeg(C);
- if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
if (C->getType()->getElementType()->isFloatingPointTy())
return ConstantExpr::getFNeg(C);
@@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
MadeChange = true;
}
- if ((*I)->getType() != IntPtrTy) {
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
@@ -909,7 +915,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
- PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+
+ // We do not handle pointer-vector geps here.
+ if (!StrippedPtrTy)
+ return 0;
+
if (StrippedPtr != PtrOp &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1235,15 +1246,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
if (I->getOpcode() == Instruction::Add)
if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
// change 'switch (X+4) case 1:' into 'switch (X) case -3'
- unsigned NumCases = SI.getNumCases();
// Skip the first item since that's the default case.
- for (unsigned i = 1; i < NumCases; ++i) {
- ConstantInt* CaseVal = SI.getCaseValue(i);
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ ConstantInt* CaseVal = i.getCaseValue();
Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
AddRHS);
assert(isa<ConstantInt>(NewCaseVal) &&
"Result of expression should be constant");
- SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal));
+ i.setValue(cast<ConstantInt>(NewCaseVal));
}
SI.setCondition(I->getOperand(0));
Worklist.Add(I);
@@ -1260,24 +1271,16 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return ReplaceInstUsesWith(EV, Agg);
if (Constant *C = dyn_cast<Constant>(Agg)) {
- if (isa<UndefValue>(C))
- return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
-
- if (isa<ConstantAggregateZero>(C))
- return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
-
- if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
- // Extract the element indexed by the first index out of the constant
- Value *V = C->getOperand(*EV.idx_begin());
- if (EV.getNumIndices() > 1)
- // Extract the remaining indices out of the constant indexed by the
- // first index
- return ExtractValueInst::Create(V, EV.getIndices().slice(1));
- else
- return ReplaceInstUsesWith(EV, V);
+ if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) {
+ if (EV.getNumIndices() == 0)
+ return ReplaceInstUsesWith(EV, C2);
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
}
return 0; // Can't handle other constants
- }
+ }
+
if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
// We're extracting from an insertvalue instruction, compare the indices
const unsigned *exti, *exte, *insi, *inse;
@@ -1414,7 +1417,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
enum Personality_Type {
Unknown_Personality,
GNU_Ada_Personality,
- GNU_CXX_Personality
+ GNU_CXX_Personality,
+ GNU_ObjC_Personality
};
/// RecognizePersonality - See if the given exception handling personality
@@ -1426,7 +1430,8 @@ static Personality_Type RecognizePersonality(Value *Pers) {
return Unknown_Personality;
return StringSwitch<Personality_Type>(F->getName())
.Case("__gnat_eh_personality", GNU_Ada_Personality)
- .Case("__gxx_personality_v0", GNU_CXX_Personality)
+ .Case("__gxx_personality_v0", GNU_CXX_Personality)
+ .Case("__objc_personality_v0", GNU_ObjC_Personality)
.Default(Unknown_Personality);
}
@@ -1440,6 +1445,7 @@ static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
// match foreign exceptions (or didn't, before gcc-4.7).
return false;
case GNU_CXX_Personality:
+ case GNU_ObjC_Personality:
return TypeInfo->isNullValue();
}
llvm_unreachable("Unknown personality!");
@@ -1795,7 +1801,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
@@ -1822,7 +1829,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -1840,7 +1847,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
Constant*& FoldRes = FoldedConstants[CE];
if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, TD);
+ FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
if (!FoldRes)
FoldRes = CE;
@@ -1867,15 +1874,16 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
// See if this is an explicit destination.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if (SI->getCaseValue(i) == Cond) {
- BasicBlock *ReachableBB = SI->getSuccessor(i);
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseValue() == Cond) {
+ BasicBlock *ReachableBB = i.getCaseSuccessor();
Worklist.push_back(ReachableBB);
continue;
}
// Otherwise it is the default destination.
- Worklist.push_back(SI->getSuccessor(0));
+ Worklist.push_back(SI->getDefaultDest());
continue;
}
}
@@ -1899,14 +1907,15 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
- << F.getNameStr() << "\n");
+ << F.getName() << "\n");
{
// Do a depth-first traversal of the function, populate the worklist with
// the reachable instructions. Ignore blocks that are not reachable. Keep
// track of which blocks we visit.
SmallPtrSet<BasicBlock*, 64> Visited;
- MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+ TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
@@ -1951,7 +1960,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, TD)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -2059,7 +2068,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
bool InstCombiner::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
-
+ TLI = &getAnalysis<TargetLibraryInfo>();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt
new file mode 100644
index 000000000000..62c616160cfa
--- /dev/null
+++ b/lib/Transforms/InstCombine/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/InstCombine/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = InstCombine
+parent = Transforms
+required_libraries = Analysis Core Support Target TransformUtils
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
new file mode 100644
index 000000000000..b43b9e5facee
--- /dev/null
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -0,0 +1,937 @@
+//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+// Details of the algorithm:
+// http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asan"
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
+
+#include <string>
+#include <algorithm>
+
+using namespace llvm;
+
+static const uint64_t kDefaultShadowScale = 3;
+static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
+
+static const size_t kMaxStackMallocSize = 1 << 16; // 64K
+static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
+static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
+
+static const char *kAsanModuleCtorName = "asan.module_ctor";
+static const char *kAsanModuleDtorName = "asan.module_dtor";
+static const int kAsanCtorAndCtorPriority = 1;
+static const char *kAsanReportErrorTemplate = "__asan_report_";
+static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanInitName = "__asan_init";
+static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
+static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
+static const char *kAsanMappingScaleName = "__asan_mapping_scale";
+static const char *kAsanStackMallocName = "__asan_stack_malloc";
+static const char *kAsanStackFreeName = "__asan_stack_free";
+
+static const int kAsanStackLeftRedzoneMagic = 0xf1;
+static const int kAsanStackMidRedzoneMagic = 0xf2;
+static const int kAsanStackRightRedzoneMagic = 0xf3;
+static const int kAsanStackPartialRedzoneMagic = 0xf4;
+
+// Command-line flags.
+
+// This flag may need to be replaced with -f[no-]asan-reads.
+static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
+ cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
+ cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -f[no]asan-stack.
+static cl::opt<bool> ClStack("asan-stack",
+ cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -f[no]asan-use-after-return.
+static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
+ cl::desc("Check return-after-free"), cl::Hidden, cl::init(false));
+// This flag may need to be replaced with -f[no]asan-globals.
+static cl::opt<bool> ClGlobals("asan-globals",
+ cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClMemIntrin("asan-memintrin",
+ cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -fasan-blacklist.
+static cl::opt<std::string> ClBlackListFile("asan-blacklist",
+ cl::desc("File containing the list of functions to ignore "
+ "during instrumentation"), cl::Hidden);
+
+// These flags allow to change the shadow mapping.
+// The shadow mapping looks like
+// Shadow = (Mem >> scale) + (1 << offset_log)
+static cl::opt<int> ClMappingScale("asan-mapping-scale",
+ cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
+static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
+ cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
+
+// Optimization flags. Not user visible, used mostly for testing
+// and benchmarking the tool.
+static cl::opt<bool> ClOpt("asan-opt",
+ cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
+ cl::desc("Instrument the same temp just once"), cl::Hidden,
+ cl::init(true));
+static cl::opt<bool> ClOptGlobals("asan-opt-globals",
+ cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
+
+// Debug flags.
+static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
+ cl::init(0));
+static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
+ cl::Hidden, cl::init(0));
+static cl::opt<std::string> ClDebugFunc("asan-debug-func",
+ cl::Hidden, cl::desc("Debug func"));
+static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
+ cl::Hidden, cl::init(-1));
+static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
+ cl::Hidden, cl::init(-1));
+
+namespace {
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer : public ModulePass {
+ AddressSanitizer();
+ virtual const char *getPassName() const;
+ void instrumentMop(Instruction *I);
+ void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
+ Value *Addr, uint32_t TypeSize, bool IsWrite);
+ Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr,
+ bool IsWrite, uint32_t TypeSize);
+ bool instrumentMemIntrinsic(MemIntrinsic *MI);
+ void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
+ Value *Size,
+ Instruction *InsertBefore, bool IsWrite);
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ bool handleFunction(Module &M, Function &F);
+ bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+ bool poisonStackInFunction(Module &M, Function &F);
+ virtual bool runOnModule(Module &M);
+ bool insertGlobalRedzones(Module &M);
+ BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp);
+ static char ID; // Pass identification, replacement for typeid
+
+ private:
+
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
+ return SizeInBytes;
+ }
+ uint64_t getAlignedSize(uint64_t SizeInBytes) {
+ return ((SizeInBytes + RedzoneSize - 1)
+ / RedzoneSize) * RedzoneSize;
+ }
+ uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+ uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+ return getAlignedSize(SizeInBytes);
+ }
+
+ void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+ Value *ShadowBase, bool DoPoison);
+ bool LooksLikeCodeInBug11395(Instruction *I);
+
+ Module *CurrentModule;
+ LLVMContext *C;
+ TargetData *TD;
+ uint64_t MappingOffset;
+ int MappingScale;
+ size_t RedzoneSize;
+ int LongSize;
+ Type *IntptrTy;
+ Type *IntptrPtrTy;
+ Function *AsanCtorFunction;
+ Function *AsanInitFunction;
+ Instruction *CtorInsertBefore;
+ OwningPtr<FunctionBlackList> BL;
+};
+} // namespace
+
+char AddressSanitizer::ID = 0;
+INITIALIZE_PASS(AddressSanitizer, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
+ false, false)
+AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
+ModulePass *llvm::createAddressSanitizerPass() {
+ return new AddressSanitizer();
+}
+
+const char *AddressSanitizer::getPassName() const {
+ return "AddressSanitizer";
+}
+
+// Create a constant for Str so that we can pass it to the run-time lib.
+static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
+ Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+ return new GlobalVariable(M, StrConst->getType(), true,
+ GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+// Split the basic block and insert an if-then code.
+// Before:
+// Head
+// SplitBefore
+// Tail
+// After:
+// Head
+// if (Cmp)
+// NewBasicBlock
+// SplitBefore
+// Tail
+//
+// Returns the NewBasicBlock's terminator.
+BranchInst *AddressSanitizer::splitBlockAndInsertIfThen(
+ Instruction *SplitBefore, Value *Cmp) {
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ BasicBlock *NewBasicBlock =
+ BasicBlock::Create(*C, "", Head->getParent());
+ BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock,
+ /*ifFalse*/Tail,
+ Cmp);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
+ BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock);
+ return CheckTerm;
+}
+
+Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+ // Shadow >> scale
+ Shadow = IRB.CreateLShr(Shadow, MappingScale);
+ if (MappingOffset == 0)
+ return Shadow;
+ // (Shadow >> scale) | offset
+ return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy,
+ MappingOffset));
+}
+
+void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns,
+ Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
+ // Check the first byte.
+ {
+ IRBuilder<> IRB(InsertBefore);
+ instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
+ }
+ // Check the last byte.
+ {
+ IRBuilder<> IRB(InsertBefore);
+ Value *SizeMinusOne = IRB.CreateSub(
+ Size, ConstantInt::get(Size->getType(), 1));
+ SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
+ instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+ }
+}
+
+// Instrument memset/memmove/memcpy
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ Value *Dst = MI->getDest();
+ MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
+ Value *Src = MemTran ? MemTran->getSource() : NULL;
+ Value *Length = MI->getLength();
+
+ Constant *ConstLength = dyn_cast<Constant>(Length);
+ Instruction *InsertBefore = MI;
+ if (ConstLength) {
+ if (ConstLength->isNullValue()) return false;
+ } else {
+ // The size is not a constant so it could be zero -- check at run-time.
+ IRBuilder<> IRB(InsertBefore);
+
+ Value *Cmp = IRB.CreateICmpNE(Length,
+ Constant::getNullValue(Length->getType()));
+ InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp);
+ }
+
+ instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
+ if (Src)
+ instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
+ return true;
+}
+
+static Value *getLDSTOperand(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ return LI->getPointerOperand();
+ }
+ return cast<StoreInst>(*I).getPointerOperand();
+}
+
+void AddressSanitizer::instrumentMop(Instruction *I) {
+ int IsWrite = isa<StoreInst>(*I);
+ Value *Addr = getLDSTOperand(I);
+ if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
+ // We are accessing a global scalar variable. Nothing to catch here.
+ return;
+ }
+ Type *OrigPtrTy = Addr->getType();
+ Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+
+ assert(OrigTy->isSized());
+ uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+
+ if (TypeSize != 8 && TypeSize != 16 &&
+ TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+ // Ignore all unusual sizes.
+ return;
+ }
+
+ IRBuilder<> IRB(I);
+ instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
+}
+
+Instruction *AddressSanitizer::generateCrashCode(
+ IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) {
+ // IsWrite and TypeSize are encoded in the function name.
+ std::string FunctionName = std::string(kAsanReportErrorTemplate) +
+ (IsWrite ? "store" : "load") + itostr(TypeSize / 8);
+ Value *ReportWarningFunc = CurrentModule->getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IntptrTy, NULL);
+ CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr);
+ Call->setDoesNotReturn();
+ return Call;
+}
+
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
+ IRBuilder<> &IRB, Value *Addr,
+ uint32_t TypeSize, bool IsWrite) {
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+
+ Type *ShadowTy = IntegerType::get(
+ *C, std::max(8U, TypeSize >> MappingScale));
+ Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+ Value *ShadowPtr = memToShadow(AddrLong, IRB);
+ Value *CmpVal = Constant::getNullValue(ShadowTy);
+ Value *ShadowValue = IRB.CreateLoad(
+ IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+
+ Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
+
+ Instruction *CheckTerm = splitBlockAndInsertIfThen(
+ cast<Instruction>(Cmp)->getNextNode(), Cmp);
+ IRBuilder<> IRB2(CheckTerm);
+
+ size_t Granularity = 1 << MappingScale;
+ if (TypeSize < 8 * Granularity) {
+ // Addr & (Granularity - 1)
+ Value *Lower3Bits = IRB2.CreateAnd(
+ AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+ // (Addr & (Granularity - 1)) + size - 1
+ Value *LastAccessedByte = IRB2.CreateAdd(
+ Lower3Bits, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+ // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+ LastAccessedByte = IRB2.CreateIntCast(
+ LastAccessedByte, IRB.getInt8Ty(), false);
+ // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+ Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue);
+
+ CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2);
+ }
+
+ IRBuilder<> IRB1(CheckTerm);
+ Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
+ Crash->setDebugLoc(OrigIns->getDebugLoc());
+ ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
+}
+
+// This function replaces all global variables with new variables that have
+// trailing redzones. It also creates a function that poisons
+// redzones and inserts this function into llvm.global_ctors.
+bool AddressSanitizer::insertGlobalRedzones(Module &M) {
+ SmallVector<GlobalVariable *, 16> GlobalsToChange;
+
+ for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
+ E = M.getGlobalList().end(); G != E; ++G) {
+ Type *Ty = cast<PointerType>(G->getType())->getElementType();
+ DEBUG(dbgs() << "GLOBAL: " << *G);
+
+ if (!Ty->isSized()) continue;
+ if (!G->hasInitializer()) continue;
+ // Touch only those globals that will not be defined in other modules.
+ // Don't handle ODR type linkages since other modules may be built w/o asan.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ G->getLinkage() != GlobalVariable::PrivateLinkage &&
+ G->getLinkage() != GlobalVariable::InternalLinkage)
+ continue;
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal())
+ continue;
+ // For now, just ignore this Alloca if the alignment is large.
+ if (G->getAlignment() > RedzoneSize) continue;
+
+ // Ignore all the globals with the names starting with "\01L_OBJC_".
+ // Many of those are put into the .cstring section. The linker compresses
+ // that section by removing the spare \0s after the string terminator, so
+ // our redzones get broken.
+ if ((G->getName().find("\01L_OBJC_") == 0) ||
+ (G->getName().find("\01l_OBJC_") == 0)) {
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ continue;
+ }
+
+ if (G->hasSection()) {
+ StringRef Section(G->getSection());
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if ((Section.find("__OBJC,") == 0) ||
+ (Section.find("__DATA, __objc_") == 0)) {
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ continue;
+ }
+ // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (Section.find("__DATA,__cfstring") == 0) {
+ DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ continue;
+ }
+ }
+
+ GlobalsToChange.push_back(G);
+ }
+
+ size_t n = GlobalsToChange.size();
+ if (n == 0) return false;
+
+ // A global is described by a structure
+ // size_t beg;
+ // size_t size;
+ // size_t size_with_redzone;
+ // const char *name;
+ // We initialize an array of such structures and pass it to a run-time call.
+ StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
+ IntptrTy, IntptrTy, NULL);
+ SmallVector<Constant *, 16> Initializers(n);
+
+ IRBuilder<> IRB(CtorInsertBefore);
+
+ for (size_t i = 0; i < n; i++) {
+ GlobalVariable *G = GlobalsToChange[i];
+ PointerType *PtrTy = cast<PointerType>(G->getType());
+ Type *Ty = PtrTy->getElementType();
+ uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
+ uint64_t RightRedzoneSize = RedzoneSize +
+ (RedzoneSize - (SizeInBytes % RedzoneSize));
+ Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+
+ StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
+ Constant *NewInitializer = ConstantStruct::get(
+ NewTy, G->getInitializer(),
+ Constant::getNullValue(RightRedZoneTy), NULL);
+
+ SmallString<2048> DescriptionOfGlobal = G->getName();
+ DescriptionOfGlobal += " (";
+ DescriptionOfGlobal += M.getModuleIdentifier();
+ DescriptionOfGlobal += ")";
+ GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+
+ // Create a new global variable with enough space for a redzone.
+ GlobalVariable *NewGlobal = new GlobalVariable(
+ M, NewTy, G->isConstant(), G->getLinkage(),
+ NewInitializer, "", G, G->isThreadLocal());
+ NewGlobal->copyAttributesFrom(G);
+ NewGlobal->setAlignment(RedzoneSize);
+
+ Value *Indices2[2];
+ Indices2[0] = IRB.getInt32(0);
+ Indices2[1] = IRB.getInt32(0);
+
+ G->replaceAllUsesWith(
+ ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+ NewGlobal->takeName(G);
+ G->eraseFromParent();
+
+ Initializers[i] = ConstantStruct::get(
+ GlobalStructTy,
+ ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
+ ConstantInt::get(IntptrTy, SizeInBytes),
+ ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
+ ConstantExpr::getPointerCast(Name, IntptrTy),
+ NULL);
+ DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
+ }
+
+ ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
+ GlobalVariable *AllGlobals = new GlobalVariable(
+ M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
+ ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+
+ Function *AsanRegisterGlobals = cast<Function>(M.getOrInsertFunction(
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
+
+ IRB.CreateCall2(AsanRegisterGlobals,
+ IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n));
+
+ // We also need to unregister globals at the end, e.g. when a shared library
+ // gets closed.
+ Function *AsanDtorFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+ IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
+ Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+
+ IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
+ IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n));
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+
+ DEBUG(dbgs() << M);
+ return true;
+}
+
+// virtual
+bool AddressSanitizer::runOnModule(Module &M) {
+ // Initialize the private fields. No one has accessed them before.
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD)
+ return false;
+ BL.reset(new FunctionBlackList(ClBlackListFile));
+
+ CurrentModule = &M;
+ C = &(M.getContext());
+ LongSize = TD->getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ IntptrPtrTy = PointerType::get(IntptrTy, 0);
+
+ AsanCtorFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
+ BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
+ CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB);
+
+ // call __asan_init in the module ctor.
+ IRBuilder<> IRB(CtorInsertBefore);
+ AsanInitFunction = cast<Function>(
+ M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
+ AsanInitFunction->setLinkage(Function::ExternalLinkage);
+ IRB.CreateCall(AsanInitFunction);
+
+ MappingOffset = LongSize == 32
+ ? kDefaultShadowOffset32 : kDefaultShadowOffset64;
+ if (ClMappingOffsetLog >= 0) {
+ if (ClMappingOffsetLog == 0) {
+ // special case
+ MappingOffset = 0;
+ } else {
+ MappingOffset = 1ULL << ClMappingOffsetLog;
+ }
+ }
+ MappingScale = kDefaultShadowScale;
+ if (ClMappingScale) {
+ MappingScale = ClMappingScale;
+ }
+ // Redzone used for stack and globals is at least 32 bytes.
+ // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+ RedzoneSize = std::max(32, (int)(1 << MappingScale));
+
+ bool Res = false;
+
+ if (ClGlobals)
+ Res |= insertGlobalRedzones(M);
+
+ if (ClMappingOffsetLog >= 0) {
+ // Tell the run-time the current values of mapping offset and scale.
+ GlobalValue *asan_mapping_offset =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, MappingOffset),
+ kAsanMappingOffsetName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_offset, true);
+ }
+ if (ClMappingScale) {
+ GlobalValue *asan_mapping_scale =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, MappingScale),
+ kAsanMappingScaleName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_scale, true);
+ }
+
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ Res |= handleFunction(M, *F);
+ }
+
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
+
+ return Res;
+}
+
+bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
+ // For each NSObject descendant having a +load method, this method is invoked
+ // by the ObjC runtime before any of the static constructors is called.
+ // Therefore we need to instrument such methods with a call to __asan_init
+ // at the beginning in order to initialize our runtime before any access to
+ // the shadow memory.
+ // We cannot just ignore these methods, because they may call other
+ // instrumented functions.
+ if (F.getName().find(" load]") != std::string::npos) {
+ IRBuilder<> IRB(F.begin()->begin());
+ IRB.CreateCall(AsanInitFunction);
+ return true;
+ }
+ return false;
+}
+
+bool AddressSanitizer::handleFunction(Module &M, Function &F) {
+ if (BL->isIn(F)) return false;
+ if (&F == AsanCtorFunction) return false;
+
+ // If needed, insert __asan_init before checking for AddressSafety attr.
+ maybeInsertAsanInitAtFunctionEntry(F);
+
+ if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
+
+ if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
+ return false;
+ // We want to instrument every address only once per basic block
+ // (unless there are calls between uses).
+ SmallSet<Value*, 16> TempsToInstrument;
+ SmallVector<Instruction*, 16> ToInstrument;
+ SmallVector<Instruction*, 8> NoReturnCalls;
+
+ // Fill the set of memory operations to instrument.
+ for (Function::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ TempsToInstrument.clear();
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ if (LooksLikeCodeInBug11395(BI)) return false;
+ if ((isa<LoadInst>(BI) && ClInstrumentReads) ||
+ (isa<StoreInst>(BI) && ClInstrumentWrites)) {
+ Value *Addr = getLDSTOperand(BI);
+ if (ClOpt && ClOptSameTemp) {
+ if (!TempsToInstrument.insert(Addr))
+ continue; // We've seen this temp in the current BB.
+ }
+ } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
+ // ok, take it.
+ } else {
+ if (CallInst *CI = dyn_cast<CallInst>(BI)) {
+ // A call inside BB.
+ TempsToInstrument.clear();
+ if (CI->doesNotReturn()) {
+ NoReturnCalls.push_back(CI);
+ }
+ }
+ continue;
+ }
+ ToInstrument.push_back(BI);
+ }
+ }
+
+ // Instrument.
+ int NumInstrumented = 0;
+ for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
+ Instruction *Inst = ToInstrument[i];
+ if (ClDebugMin < 0 || ClDebugMax < 0 ||
+ (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
+ if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst))
+ instrumentMop(Inst);
+ else
+ instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+ }
+ NumInstrumented++;
+ }
+
+ DEBUG(dbgs() << F);
+
+ bool ChangedStack = poisonStackInFunction(M, F);
+
+ // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
+ // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
+ for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
+ Instruction *CI = NoReturnCalls[i];
+ IRBuilder<> IRB(CI);
+ IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName,
+ IRB.getVoidTy(), NULL));
+ }
+
+ return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+}
+
+static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
+ if (ShadowRedzoneSize == 1) return PoisonByte;
+ if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte;
+ if (ShadowRedzoneSize == 4)
+ return (PoisonByte << 24) + (PoisonByte << 16) +
+ (PoisonByte << 8) + (PoisonByte);
+ llvm_unreachable("ShadowRedzoneSize is either 1, 2 or 4");
+}
+
+static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
+ size_t Size,
+ size_t RedzoneSize,
+ size_t ShadowGranularity,
+ uint8_t Magic) {
+ for (size_t i = 0; i < RedzoneSize;
+ i+= ShadowGranularity, Shadow++) {
+ if (i + ShadowGranularity <= Size) {
+ *Shadow = 0; // fully addressable
+ } else if (i >= Size) {
+ *Shadow = Magic; // unaddressable
+ } else {
+ *Shadow = Size - i; // first Size-i bytes are addressable
+ }
+ }
+}
+
+void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
+ IRBuilder<> IRB,
+ Value *ShadowBase, bool DoPoison) {
+ size_t ShadowRZSize = RedzoneSize >> MappingScale;
+ assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
+ Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
+ Type *RZPtrTy = PointerType::get(RZTy, 0);
+
+ Value *PoisonLeft = ConstantInt::get(RZTy,
+ ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize));
+ Value *PoisonMid = ConstantInt::get(RZTy,
+ ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize));
+ Value *PoisonRight = ConstantInt::get(RZTy,
+ ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize));
+
+ // poison the first red zone.
+ IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
+
+ // poison all other red zones.
+ uint64_t Pos = RedzoneSize;
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+ AllocaInst *AI = AllocaVec[i];
+ uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+ uint64_t AlignedSize = getAlignedAllocaSize(AI);
+ assert(AlignedSize - SizeInBytes < RedzoneSize);
+ Value *Ptr = NULL;
+
+ Pos += AlignedSize;
+
+ assert(ShadowBase->getType() == IntptrTy);
+ if (SizeInBytes < AlignedSize) {
+ // Poison the partial redzone at right
+ Ptr = IRB.CreateAdd(
+ ShadowBase, ConstantInt::get(IntptrTy,
+ (Pos >> MappingScale) - ShadowRZSize));
+ size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes);
+ uint32_t Poison = 0;
+ if (DoPoison) {
+ PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
+ RedzoneSize,
+ 1ULL << MappingScale,
+ kAsanStackPartialRedzoneMagic);
+ }
+ Value *PartialPoison = ConstantInt::get(RZTy, Poison);
+ IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+ }
+
+ // Poison the full redzone at right.
+ Ptr = IRB.CreateAdd(ShadowBase,
+ ConstantInt::get(IntptrTy, Pos >> MappingScale));
+ Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid;
+ IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+
+ Pos += RedzoneSize;
+ }
+}
+
+// Workaround for bug 11395: we don't want to instrument stack in functions
+// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
+// FIXME: remove once the bug 11395 is fixed.
+bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
+ if (LongSize != 32) return false;
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI || !CI->isInlineAsm()) return false;
+ if (CI->getNumArgOperands() <= 5) return false;
+ // We have inline assembly with quite a few arguments.
+ return true;
+}
+
+// Find all static Alloca instructions and put
+// poisoned red zones around all of them.
+// Then unpoison everything back before the function returns.
+//
+// Stack poisoning does not play well with exception handling.
+// When an exception is thrown, we essentially bypass the code
+// that unpoisones the stack. This is why the run-time library has
+// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
+// stack in the interceptor. This however does not work inside the
+// actual function which catches the exception. Most likely because the
+// compiler hoists the load of the shadow value somewhere too high.
+// This causes asan to report a non-existing bug on 453.povray.
+// It sounds like an LLVM bug.
+bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
+ if (!ClStack) return false;
+ SmallVector<AllocaInst*, 16> AllocaVec;
+ SmallVector<Instruction*, 8> RetVec;
+ uint64_t TotalSize = 0;
+
+ // Filter out Alloca instructions we want (and can) handle.
+ // Collect Ret instructions.
+ for (Function::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ BasicBlock &BB = *FI;
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
+ BI != BE; ++BI) {
+ if (isa<ReturnInst>(BI)) {
+ RetVec.push_back(BI);
+ continue;
+ }
+
+ AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+ if (!AI) continue;
+ if (AI->isArrayAllocation()) continue;
+ if (!AI->isStaticAlloca()) continue;
+ if (!AI->getAllocatedType()->isSized()) continue;
+ if (AI->getAlignment() > RedzoneSize) continue;
+ AllocaVec.push_back(AI);
+ uint64_t AlignedSize = getAlignedAllocaSize(AI);
+ TotalSize += AlignedSize;
+ }
+ }
+
+ if (AllocaVec.empty()) return false;
+
+ uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize;
+
+ bool DoStackMalloc = ClUseAfterReturn
+ && LocalStackSize <= kMaxStackMallocSize;
+
+ Instruction *InsBefore = AllocaVec[0];
+ IRBuilder<> IRB(InsBefore);
+
+
+ Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
+ AllocaInst *MyAlloca =
+ new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
+ MyAlloca->setAlignment(RedzoneSize);
+ assert(MyAlloca->isStaticAlloca());
+ Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
+ Value *LocalStackBase = OrigStackBase;
+
+ if (DoStackMalloc) {
+ Value *AsanStackMallocFunc = M.getOrInsertFunction(
+ kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
+ LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+ ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+ }
+
+ // This string will be parsed by the run-time (DescribeStackAddress).
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ StackDescription << F.getName() << " " << AllocaVec.size() << " ";
+
+ uint64_t Pos = RedzoneSize;
+ // Replace Alloca instructions with base+offset.
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+ AllocaInst *AI = AllocaVec[i];
+ uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+ StringRef Name = AI->getName();
+ StackDescription << Pos << " " << SizeInBytes << " "
+ << Name.size() << " " << Name << " ";
+ uint64_t AlignedSize = getAlignedAllocaSize(AI);
+ assert((AlignedSize % RedzoneSize) == 0);
+ AI->replaceAllUsesWith(
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
+ AI->getType()));
+ Pos += AlignedSize + RedzoneSize;
+ }
+ assert(Pos == LocalStackSize);
+
+ // Write the Magic value and the frame description constant to the redzone.
+ Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
+ IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
+ BasePlus0);
+ Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, LongSize/8));
+ BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
+ Value *Description = IRB.CreatePointerCast(
+ createPrivateGlobalForString(M, StackDescription.str()),
+ IntptrTy);
+ IRB.CreateStore(Description, BasePlus1);
+
+ // Poison the stack redzones at the entry.
+ Value *ShadowBase = memToShadow(LocalStackBase, IRB);
+ PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
+
+ Value *AsanStackFreeFunc = NULL;
+ if (DoStackMalloc) {
+ AsanStackFreeFunc = M.getOrInsertFunction(
+ kAsanStackFreeName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy, NULL);
+ }
+
+ // Unpoison the stack before all ret instructions.
+ for (size_t i = 0, n = RetVec.size(); i < n; i++) {
+ Instruction *Ret = RetVec[i];
+ IRBuilder<> IRBRet(Ret);
+
+ // Mark the current frame as retired.
+ IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
+ BasePlus0);
+ // Unpoison the stack.
+ PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false);
+
+ if (DoStackMalloc) {
+ IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
+ ConstantInt::get(IntptrTy, LocalStackSize),
+ OrigStackBase);
+ }
+ }
+
+ if (ClDebugStack) {
+ DEBUG(dbgs() << F);
+ }
+
+ return true;
+}
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 7b3a927a4e68..e4c8cf105cee 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,15 +1,11 @@
add_llvm_library(LLVMInstrumentation
+ AddressSanitizer.cpp
EdgeProfiling.cpp
+ FunctionBlackList.cpp
GCOVProfiling.cpp
Instrumentation.cpp
OptimalEdgeProfiling.cpp
PathProfiling.cpp
ProfilingUtils.cpp
- )
-
-add_llvm_library_dependencies(LLVMInstrumentation
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTransformUtils
+ ThreadSanitizer.cpp
)
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
new file mode 100644
index 000000000000..188ea4d9b3cb
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
@@ -0,0 +1,79 @@
+//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+FunctionBlackList::FunctionBlackList(const std::string &Path) {
+ Functions = NULL;
+ const char *kFunPrefix = "fun:";
+ if (!Path.size()) return;
+ std::string Fun;
+
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
+ report_fatal_error("Can't open blacklist file " + Path + ": " +
+ EC.message());
+ }
+ MemoryBuffer *Buff = File.take();
+ const char *Data = Buff->getBufferStart();
+ size_t DataLen = Buff->getBufferSize();
+ SmallVector<StringRef, 16> Lines;
+ SplitString(StringRef(Data, DataLen), Lines, "\n\r");
+ for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
+ if (Lines[i].startswith(kFunPrefix)) {
+ std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
+ std::string ThisFuncRE;
+ // add ThisFunc replacing * with .*
+ for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
+ if (ThisFunc[j] == '*')
+ ThisFuncRE += '.';
+ ThisFuncRE += ThisFunc[j];
+ }
+ // Check that the regexp is valid.
+ Regex CheckRE(ThisFuncRE);
+ std::string Error;
+ if (!CheckRE.isValid(Error))
+ report_fatal_error("malformed blacklist regex: " + ThisFunc +
+ ": " + Error);
+ // Append to the final regexp.
+ if (Fun.size())
+ Fun += "|";
+ Fun += ThisFuncRE;
+ }
+ }
+ if (Fun.size()) {
+ Functions = new Regex(Fun);
+ }
+}
+
+bool FunctionBlackList::isIn(const Function &F) {
+ if (Functions) {
+ bool Res = Functions->match(F.getName());
+ return Res;
+ }
+ return false;
+}
+
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h
new file mode 100644
index 000000000000..c1239b9b7e0d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.h
@@ -0,0 +1,37 @@
+//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include <string>
+
+namespace llvm {
+class Function;
+class Regex;
+
+// Blacklisted functions are not instrumented.
+// The blacklist file contains one or more lines like this:
+// ---
+// fun:FunctionWildCard
+// ---
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+class FunctionBlackList {
+ public:
+ FunctionBlackList(const std::string &Path);
+ bool isIn(const Function &F);
+ private:
+ Regex *Functions;
+};
+
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index ccf7e1109cd9..96e5d5b31140 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -43,12 +43,14 @@ namespace {
public:
static char ID;
GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+ : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
+ UseExtraChecksum(false) {
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+ GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
+ bool useExtraChecksum = false)
: ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- Use402Format(use402Format) {
+ Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
@@ -94,6 +96,7 @@ namespace {
bool EmitNotes;
bool EmitData;
bool Use402Format;
+ bool UseExtraChecksum;
Module *M;
LLVMContext *Ctx;
@@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- bool Use402Format) {
- return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+ bool Use402Format,
+ bool UseExtraChecksum) {
+ return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
}
namespace {
@@ -167,7 +171,7 @@ namespace {
}
uint32_t length() {
- // Here 2 = 1 for string lenght + 1 for '0' id#.
+ // Here 2 = 1 for string length + 1 for '0' id#.
return lengthOfGCOVString(Filename) + 2 + Lines.size();
}
@@ -244,10 +248,12 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+ GCOVFunction(DISubprogram SP, raw_ostream *os,
+ bool Use402Format, bool UseExtraChecksum) {
this->os = os;
Function *F = SP.getFunction();
+ DEBUG(dbgs() << "Function: " << F->getName() << "\n");
uint32_t i = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
Blocks[BB] = new GCOVBlock(i++, os);
@@ -257,14 +263,14 @@ namespace {
writeBytes(FunctionTag, 4);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (!Use402Format)
- ++BlockLen; // For second checksum.
+ if (UseExtraChecksum)
+ ++BlockLen;
write(BlockLen);
uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
write(Ident);
- write(0); // checksum #1
- if (!Use402Format)
- write(0); // checksum #2
+ write(0); // lineno checksum
+ if (UseExtraChecksum)
+ write(0); // cfg checksum
writeGCOVString(SP.getName());
writeGCOVString(SP.getFilename());
write(SP.getLineNumber());
@@ -290,6 +296,7 @@ namespace {
for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
write(0); // No flags on our blocks.
}
+ DEBUG(dbgs() << Blocks.size() << " blocks.\n");
// Emit edges between blocks.
for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
@@ -301,6 +308,8 @@ namespace {
write(Block.OutEdges.size() * 2 + 1);
write(Block.Number);
for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+ << "\n");
write(Block.OutEdges[i]->Number);
write(0); // no flags
}
@@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) {
}
void GCOVProfiler::emitGCNO() {
- DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (CU_Nodes) {
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- // Each compile unit gets its own .gcno file. This means that whether we run
- // this pass over the original .o's as they're produced, or run it after
- // LTO, we'll generate the same .gcno files.
-
- DICompileUnit CU(CU_Nodes->getOperand(i));
- raw_fd_ostream *&out = GcnoFiles[CU];
- std::string ErrorInfo;
- out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out->write("oncg*404MVLL", 12);
- else
- out->write("oncg*204MVLL", 12);
-
- DIArray SPs = CU.getSubprograms();
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram SP(SPs.getElement(i));
- if (!SP.Verify()) continue;
- raw_fd_ostream *&os = GcnoFiles[CU];
-
- Function *F = SP.getFunction();
- if (!F) continue;
- GCOVFunction Func(SP, os, Use402Format);
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
- TerminatorInst *TI = BB->getTerminator();
- if (int successors = TI->getNumSuccessors()) {
- for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
- }
- } else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
- }
-
- uint32_t Line = 0;
- for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
- const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
- if (Line == Loc.getLine()) continue;
- Line = Loc.getLine();
- if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
-
- GCOVLines &Lines = Block.getFile(SP.getFilename());
- Lines.addLine(Loc.getLine());
+ if (!CU_Nodes) return;
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ std::string ErrorInfo;
+ raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!Use402Format)
+ out.write("oncg*404MVLL", 12);
+ else
+ out.write("oncg*204MVLL", 12);
+
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
}
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+ I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
}
- Func.writeOut();
}
+ Func.writeOut();
}
- }
-
- for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
- I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
- raw_fd_ostream *&out = I->second;
- out->write("\0\0\0\0\0\0\0\0", 8); // EOF
- out->close();
- delete out;
+ out.write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out.close();
}
}
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 71adc1ec6de0..c7266e2f8de6 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -24,6 +24,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeOptimalEdgeProfilerPass(Registry);
initializePathProfilerPass(Registry);
initializeGCOVProfilerPass(Registry);
+ initializeAddressSanitizerPass(Registry);
+ initializeThreadSanitizerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
new file mode 100644
index 000000000000..d36ad540ee80
--- /dev/null
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/Instrumentation/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Instrumentation
+parent = Transforms
+required_libraries = Analysis Core Support TransformUtils
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 62c21b8e9c59..1fe12545d294 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -69,7 +69,7 @@ inline static void printEdgeCounter(ProfileInfo::Edge e,
BasicBlock* b,
unsigned i) {
DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
- << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+ << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
}
bool OptimalEdgeProfiler::runOnModule(Module &M) {
@@ -127,7 +127,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
unsigned i = 0;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n");
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
// Calculate a Maximum Spanning Tree with the edge weights determined by
// ProfileEstimator. ProfileEstimator also assign weights to the virtual
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 23915d39f214..b2147968dfac 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -665,7 +665,7 @@ void BLInstrumentationDag::unlinkPhony() {
// Generate a .dot graph to represent the DAG and pathNumbers
void BLInstrumentationDag::generateDotGraph() {
std::string errorInfo;
- std::string functionName = getFunction().getNameStr();
+ std::string functionName = getFunction().getName().str();
std::string filename = "pathdag." + functionName + ".dot";
DEBUG (dbgs() << "Writing '" << filename << "'...\n");
@@ -750,7 +750,8 @@ Value* BLInstrumentationNode::getStartingPathNumber(){
// Sets the Value of the pathNumber. Used by the instrumentation code.
void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
- pathNumber->getNameStr() : "unused") << "\n");
+ pathNumber->getName() :
+ "unused") << "\n");
_startingPathNumber = pathNumber;
}
@@ -760,7 +761,7 @@ Value* BLInstrumentationNode::getEndingPathNumber(){
void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
DEBUG(dbgs() << " EPN-" << getName() << " <-- "
- << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+ << (pathNumber ? pathNumber->getName() : "unused") << "\n");
_endingPathNumber = pathNumber;
}
@@ -1239,9 +1240,9 @@ void PathProfiler::insertInstrumentation(
insertPoint++;
DEBUG(dbgs() << "\nInstrumenting method call block '"
- << node->getBlock()->getNameStr() << "'\n");
+ << node->getBlock()->getName() << "'\n");
DEBUG(dbgs() << " Path number initialized: "
- << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+ << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
Value* newpn;
if( node->getStartingPathNumber() ) {
@@ -1370,7 +1371,7 @@ bool PathProfiler::runOnModule(Module &M) {
if (F->isDeclaration())
continue;
- DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+ DEBUG(dbgs() << "Function: " << F->getName() << "\n");
functionNumber++;
// set function number
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
new file mode 100644
index 000000000000..8bb337eb2b05
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -0,0 +1,311 @@
+//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer, a race detector.
+//
+// The tool is under development, for the details about previous versions see
+// http://code.google.com/p/data-race-test
+//
+// The instrumentation phase is quite simple:
+// - Insert calls to run-time library before every memory access.
+// - Optimizations may apply to avoid instrumenting some of the accesses.
+// - Insert calls at function entry/exit.
+// The rest is handled by the run-time library.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tsan"
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+static cl::opt<std::string> ClBlackListFile("tsan-blacklist",
+ cl::desc("Blacklist file"), cl::Hidden);
+
+static cl::opt<bool> ClPrintStats("tsan-print-stats",
+ cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden);
+
+namespace {
+
+// Stats counters for ThreadSanitizer instrumentation.
+struct ThreadSanitizerStats {
+ size_t NumInstrumentedReads;
+ size_t NumInstrumentedWrites;
+ size_t NumOmittedReadsBeforeWrite;
+ size_t NumAccessesWithBadSize;
+ size_t NumInstrumentedVtableWrites;
+ size_t NumOmittedReadsFromConstantGlobals;
+ size_t NumOmittedReadsFromVtable;
+};
+
+/// ThreadSanitizer: instrument the code in module to find races.
+struct ThreadSanitizer : public FunctionPass {
+ ThreadSanitizer();
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ bool instrumentLoadOrStore(Instruction *I);
+ static char ID; // Pass identification, replacement for typeid.
+
+ private:
+ void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
+ SmallVectorImpl<Instruction*> &All);
+ bool addrPointsToConstantData(Value *Addr);
+
+ TargetData *TD;
+ OwningPtr<FunctionBlackList> BL;
+ // Callbacks to run-time library are computed in doInitialization.
+ Value *TsanFuncEntry;
+ Value *TsanFuncExit;
+ // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+ static const size_t kNumberOfAccessSizes = 5;
+ Value *TsanRead[kNumberOfAccessSizes];
+ Value *TsanWrite[kNumberOfAccessSizes];
+ Value *TsanVptrUpdate;
+
+ // Stats are modified w/o synchronization.
+ ThreadSanitizerStats stats;
+};
+} // namespace
+
+char ThreadSanitizer::ID = 0;
+INITIALIZE_PASS(ThreadSanitizer, "tsan",
+ "ThreadSanitizer: detects data races.",
+ false, false)
+
+ThreadSanitizer::ThreadSanitizer()
+ : FunctionPass(ID),
+ TD(NULL) {
+}
+
+FunctionPass *llvm::createThreadSanitizerPass() {
+ return new ThreadSanitizer();
+}
+
+bool ThreadSanitizer::doInitialization(Module &M) {
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD)
+ return false;
+ BL.reset(new FunctionBlackList(ClBlackListFile));
+ memset(&stats, 0, sizeof(stats));
+
+ // Always insert a call to __tsan_init into the module's CTORs.
+ IRBuilder<> IRB(M.getContext());
+ Value *TsanInit = M.getOrInsertFunction("__tsan_init",
+ IRB.getVoidTy(), NULL);
+ appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+
+ // Initialize the callbacks.
+ TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), NULL);
+ TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(),
+ NULL);
+ for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
+ SmallString<32> ReadName("__tsan_read");
+ ReadName += itostr(1 << i);
+ TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), NULL);
+ SmallString<32> WriteName("__tsan_write");
+ WriteName += itostr(1 << i);
+ TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), NULL);
+ }
+ TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ NULL);
+ return true;
+}
+
+bool ThreadSanitizer::doFinalization(Module &M) {
+ if (ClPrintStats) {
+ errs() << "ThreadSanitizerStats " << M.getModuleIdentifier()
+ << ": wr " << stats.NumInstrumentedWrites
+ << "; rd " << stats.NumInstrumentedReads
+ << "; vt " << stats.NumInstrumentedVtableWrites
+ << "; bs " << stats.NumAccessesWithBadSize
+ << "; rbw " << stats.NumOmittedReadsBeforeWrite
+ << "; rcg " << stats.NumOmittedReadsFromConstantGlobals
+ << "; rvt " << stats.NumOmittedReadsFromVtable
+ << "\n";
+ }
+ return true;
+}
+
+static bool isVtableAccess(Instruction *I) {
+ if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
+ if (Tag->getNumOperands() < 1) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ }
+ return false;
+}
+
+bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
+ // If this is a GEP, just analyze its pointer operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
+ Addr = GEP->getPointerOperand();
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ if (GV->isConstant()) {
+ // Reads from constant globals can not race with any writes.
+ stats.NumOmittedReadsFromConstantGlobals++;
+ return true;
+ }
+ } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+ if (isVtableAccess(L)) {
+ // Reads from a vtable pointer can not race with any writes.
+ stats.NumOmittedReadsFromVtable++;
+ return true;
+ }
+ }
+ return false;
+}
+
+// Instrumenting some of the accesses may be proven redundant.
+// Currently handled:
+// - read-before-write (within same BB, no calls between)
+//
+// We do not handle some of the patterns that should not survive
+// after the classic compiler optimizations.
+// E.g. two reads from the same temp should be eliminated by CSE,
+// two writes should be eliminated by DSE, etc.
+//
+// 'Local' is a vector of insns within the same BB (no calls between).
+// 'All' is a vector of insns that will be instrumented.
+void ThreadSanitizer::choseInstructionsToInstrument(
+ SmallVectorImpl<Instruction*> &Local,
+ SmallVectorImpl<Instruction*> &All) {
+ SmallSet<Value*, 8> WriteTargets;
+ // Iterate from the end.
+ for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
+ E = Local.rend(); It != E; ++It) {
+ Instruction *I = *It;
+ if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
+ WriteTargets.insert(Store->getPointerOperand());
+ } else {
+ LoadInst *Load = cast<LoadInst>(I);
+ Value *Addr = Load->getPointerOperand();
+ if (WriteTargets.count(Addr)) {
+ // We will write to this temp, so no reason to analyze the read.
+ stats.NumOmittedReadsBeforeWrite++;
+ continue;
+ }
+ if (addrPointsToConstantData(Addr)) {
+ // Addr points to some constant data -- it can not race with any writes.
+ continue;
+ }
+ }
+ All.push_back(I);
+ }
+ Local.clear();
+}
+
+bool ThreadSanitizer::runOnFunction(Function &F) {
+ if (!TD) return false;
+ if (BL->isIn(F)) return false;
+ SmallVector<Instruction*, 8> RetVec;
+ SmallVector<Instruction*, 8> AllLoadsAndStores;
+ SmallVector<Instruction*, 8> LocalLoadsAndStores;
+ bool Res = false;
+ bool HasCalls = false;
+
+ // Traverse all instructions, collect loads/stores/returns, check for calls.
+ for (Function::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ BasicBlock &BB = *FI;
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
+ BI != BE; ++BI) {
+ if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
+ LocalLoadsAndStores.push_back(BI);
+ else if (isa<ReturnInst>(BI))
+ RetVec.push_back(BI);
+ else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+ HasCalls = true;
+ choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ }
+ }
+ choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ }
+
+ // We have collected all loads and stores.
+ // FIXME: many of these accesses do not need to be checked for races
+ // (e.g. variables that do not escape, etc).
+
+ // Instrument memory accesses.
+ for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
+ Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
+ }
+
+ // Instrument function entry/exit points if there were instrumented accesses.
+ if (Res || HasCalls) {
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ Value *ReturnAddress = IRB.CreateCall(
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
+ IRB.getInt32(0));
+ IRB.CreateCall(TsanFuncEntry, ReturnAddress);
+ for (size_t i = 0, n = RetVec.size(); i < n; ++i) {
+ IRBuilder<> IRBRet(RetVec[i]);
+ IRBRet.CreateCall(TsanFuncExit);
+ }
+ Res = true;
+ }
+ return Res;
+}
+
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+ IRBuilder<> IRB(I);
+ bool IsWrite = isa<StoreInst>(*I);
+ Value *Addr = IsWrite
+ ? cast<StoreInst>(I)->getPointerOperand()
+ : cast<LoadInst>(I)->getPointerOperand();
+ Type *OrigPtrTy = Addr->getType();
+ Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+ assert(OrigTy->isSized());
+ uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+ if (TypeSize != 8 && TypeSize != 16 &&
+ TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+ stats.NumAccessesWithBadSize++;
+ // Ignore all unusual sizes.
+ return false;
+ }
+ if (IsWrite && isVtableAccess(I)) {
+ Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+ IRB.CreateCall2(TsanVptrUpdate,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
+ stats.NumInstrumentedVtableWrites++;
+ return true;
+ }
+ size_t Idx = CountTrailingZeros_32(TypeSize / 8);
+ assert(Idx < kNumberOfAccessSizes);
+ Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+ IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ if (IsWrite) stats.NumInstrumentedWrites++;
+ else stats.NumInstrumentedReads++;
+ return true;
+}
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
new file mode 100644
index 000000000000..f7bca064c7e1
--- /dev/null
+++ b/lib/Transforms/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Transforms/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize
+
+[component_0]
+type = Group
+name = Transforms
+parent = Libraries
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index e527be25decb..8b1df92fa28b 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello
include $(LEVEL)/Makefile.config
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 79bcae58250f..d660c722c7ca 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMScalarOpts
DCE.cpp
DeadStoreElimination.cpp
EarlyCSE.cpp
+ GlobalMerge.cpp
GVN.cpp
IndVarSimplify.cpp
JumpThreading.cpp
@@ -31,12 +32,3 @@ add_llvm_library(LLVMScalarOpts
Sink.cpp
TailRecursionElimination.cpp
)
-
-add_llvm_library_dependencies(LLVMScalarOpts
- LLVMAnalysis
- LLVMCore
- LLVMInstCombine
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f8f18b217355..9a5423f4e2eb 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -64,11 +65,17 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
+// FIXME: Remove this abomination once all of the tests pass without it!
+static cl::opt<bool> DisableDeleteDeadBlocks(
+ "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
+ cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
@@ -97,6 +104,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
@@ -116,7 +124,10 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
@@ -127,6 +138,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
@@ -153,8 +165,22 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!DisableBranchOpts) {
MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ SmallPtrSet<BasicBlock*, 8> WorkList;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
MadeChange |= ConstantFoldTerminator(BB, true);
+ if (!MadeChange) continue;
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
+
+ if (!DisableDeleteDeadBlocks)
+ for (SmallPtrSet<BasicBlock*, 8>::iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+ DeleteDeadBlock(*I);
if (MadeChange)
ModifiedDT = true;
@@ -541,8 +567,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// happens.
WeakVH IterHandle(CurInstIterator);
- ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
- ModifiedDT ? 0 : DT);
+ replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -553,6 +579,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
return true;
}
+ if (II && TLI) {
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty())
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ return true;
+ }
+
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
@@ -612,7 +647,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
// It's not safe to eliminate the sign / zero extension of the return value.
// See llvm::isInTailCallPosition().
const Function *F = BB->getParent();
- unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
return false;
@@ -667,7 +702,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+ Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
continue;
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 664c3f6a222f..5430f6253884 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,6 +24,8 @@
#include "llvm/Constant.h"
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
#include <set>
@@ -42,19 +44,22 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
char ConstantPropagation::ID = 0;
-INITIALIZE_PASS(ConstantPropagation, "constprop",
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
"Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
}
-
bool ConstantPropagation::runOnFunction(Function &F) {
// Initialize the worklist to all of the instructions ready to process...
std::set<Instruction*> WorkList;
@@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
Instruction *I = *WorkList.begin();
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index e275268fc4ea..9b0aadb0b5b0 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -28,6 +28,7 @@ STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
+STATISTIC(NumDeadCases, "Number of switch cases removed");
namespace {
class CorrelatedValuePropagation : public FunctionPass {
@@ -37,6 +38,7 @@ namespace {
bool processPHI(PHINode *P);
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
+ bool processSwitch(SwitchInst *SI);
public:
static char ID;
@@ -110,7 +112,8 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Changed = true;
}
- ++NumPhis;
+ if (Changed)
+ ++NumPhis;
return Changed;
}
@@ -173,6 +176,86 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
return true;
}
+/// processSwitch - Simplify a switch instruction by removing cases which can
+/// never fire. If the uselessness of a case could be determined locally then
+/// constant propagation would already have figured it out. Instead, walk the
+/// predecessors and statically evaluate cases based on information available
+/// on that edge. Cases that cannot fire no matter what the incoming edge can
+/// safely be removed. If a case fires on every incoming edge then the entire
+/// switch can be removed and replaced with a branch to the case destination.
+bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ BasicBlock *BB = SI->getParent();
+
+ // If the condition was defined in same block as the switch then LazyValueInfo
+ // currently won't say anything useful about it, though in theory it could.
+ if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
+ return false;
+
+ // If the switch is unreachable then trying to improve it is a waste of time.
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ if (PB == PE) return false;
+
+ // Analyse each switch case in turn. This is done in reverse order so that
+ // removing a case doesn't cause trouble for the iteration.
+ bool Changed = false;
+ for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
+ ) {
+ ConstantInt *Case = CI.getCaseValue();
+
+ // Check to see if the switch condition is equal to/not equal to the case
+ // value on every incoming edge, equal/not equal being the same each time.
+ LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ // Is the switch condition equal to the case value?
+ LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+ Cond, Case, *PI, BB);
+ // Give up on this case if nothing is known.
+ if (Value == LazyValueInfo::Unknown) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+
+ // If this was the first edge to be visited, record that all other edges
+ // need to give the same result.
+ if (PI == PB) {
+ State = Value;
+ continue;
+ }
+
+ // If this case is known to fire for some edges and known not to fire for
+ // others then there is nothing we can do - give up.
+ if (Value != State) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+ }
+
+ if (State == LazyValueInfo::False) {
+ // This case never fires - remove it.
+ CI.getCaseSuccessor()->removePredecessor(BB);
+ SI->removeCase(CI); // Does not invalidate the iterator.
+ ++NumDeadCases;
+ Changed = true;
+ } else if (State == LazyValueInfo::True) {
+ // This case always fires. Arrange for the switch to be turned into an
+ // unconditional branch by replacing the switch condition with the case
+ // value.
+ SI->setCondition(Case);
+ NumDeadCases += SI->getNumCases();
+ Changed = true;
+ break;
+ }
+ }
+
+ if (Changed)
+ // If the switch has been simplified to the point where it can be replaced
+ // by a branch then do so now.
+ ConstantFoldTerminator(BB);
+
+ return Changed;
+}
+
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
LVI = &getAnalysis<LazyValueInfo>();
@@ -200,6 +283,13 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
}
}
+ Instruction *Term = FI->getTerminator();
+ switch (Term->getOpcode()) {
+ case Instruction::Switch:
+ BBChanged |= processSwitch(cast<SwitchInst>(Term));
+ break;
+ }
+
FnChanged |= BBChanged;
}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a593d0f44633..c8c53606015a 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -24,6 +24,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -33,6 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumFastStores, "Number of stores deleted");
@@ -42,25 +44,26 @@ namespace {
struct DSE : public FunctionPass {
AliasAnalysis *AA;
MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID), AA(0), MD(0) {
+ DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTree>();
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
- if (DT.isReachableFromEntry(I))
+ if (DT->isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
- AA = 0; MD = 0;
+ AA = 0; MD = 0; DT = 0;
return Changed;
}
@@ -221,7 +224,7 @@ static bool isRemovable(Instruction *I) {
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
- default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+ default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
case Intrinsic::lifetime_end:
// Never remove dead lifetime_end's, e.g. because it is followed by a
// free.
@@ -238,6 +241,24 @@ static bool isRemovable(Instruction *I) {
}
}
+
+/// isShortenable - Returns true if this instruction can be safely shortened in
+/// length.
+static bool isShortenable(Instruction *I) {
+ // Don't shorten stores for now
+ if (isa<StoreInst>(I))
+ return false;
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ // Do shorten memory intrinsics.
+ return true;
+ }
+}
+
/// getStoredPointerOperand - Return the pointer that is being written to.
static Value *getStoredPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -247,46 +268,61 @@ static Value *getStoredPointerOperand(Instruction *I) {
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
- default: assert(false && "Unexpected intrinsic!");
+ default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return II->getArgOperand(0);
}
}
-static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
const TargetData *TD = AA.getTargetData();
+
+ if (const CallInst *CI = extractMallocCall(V)) {
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
+ return C->getZExtValue();
+ }
+
if (TD == 0)
return AliasAnalysis::UnknownSize;
- if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+ if (const AllocaInst *A = dyn_cast<AllocaInst>(V)) {
// Get size information for the alloca
- if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
- return AliasAnalysis::UnknownSize;
}
- assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
- PointerType *PT = cast<PointerType>(V->getType());
- return TD->getTypeAllocSize(PT->getElementType());
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (A->hasByValAttr())
+ if (PointerType *PT = dyn_cast<PointerType>(A->getType()))
+ return TD->getTypeAllocSize(PT->getElementType());
+ }
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (!GV->mayBeOverridden())
+ return TD->getTypeAllocSize(GV->getType()->getElementType());
+ }
+
+ return AliasAnalysis::UnknownSize;
}
-/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
-/// pointing to an object with a pointer size we can trust.
-static bool isObjectPointerWithTrustworthySize(const Value *V) {
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return !AI->isArrayAllocation();
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return !GV->mayBeOverridden();
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValAttr();
- return false;
+namespace {
+ enum OverwriteResult
+ {
+ OverwriteComplete,
+ OverwriteEnd,
+ OverwriteUnknown
+ };
}
-/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
/// completely overwrites a store to the 'Earlier' location.
-static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
- const AliasAnalysis::Location &Earlier,
- AliasAnalysis &AA) {
+/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
+/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
+static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA,
+ int64_t &EarlierOff,
+ int64_t &LaterOff) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -300,23 +336,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
// If we have no TargetData information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (AA.getTargetData() == 0)
- return Later.Ptr->getType() == Earlier.Ptr->getType();
- return false;
+ if (AA.getTargetData() == 0 &&
+ Later.Ptr->getType() == Earlier.Ptr->getType())
+ return OverwriteComplete;
+
+ return OverwriteUnknown;
}
// Make sure that the Later size is >= the Earlier size.
- if (Later.Size < Earlier.Size)
- return false;
- return true;
+ if (Later.Size >= Earlier.Size)
+ return OverwriteComplete;
}
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize ||
- Later.Size <= Earlier.Size || AA.getTargetData() == 0)
- return false;
+ AA.getTargetData() == 0)
+ return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
@@ -329,26 +366,25 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
- return false;
+ return OverwriteUnknown;
// If the "Later" store is to a recognizable object, get its size.
- if (isObjectPointerWithTrustworthySize(UO2)) {
- uint64_t ObjectSize =
- TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
- if (ObjectSize == Later.Size)
- return true;
- }
+ uint64_t ObjectSize = getPointerSize(UO2, AA);
+ if (ObjectSize != AliasAnalysis::UnknownSize)
+ if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
+ return OverwriteComplete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- int64_t EarlierOff = 0, LaterOff = 0;
+ EarlierOff = 0;
+ LaterOff = 0;
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
- return false;
+ return OverwriteUnknown;
// The later store completely overlaps the earlier store if:
//
@@ -366,11 +402,25 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
+ Later.Size > Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
- return true;
+ return OverwriteComplete;
+
+ // The other interesting case is if the later store overwrites the end of
+ // the earlier store
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // In this case we may want to trim the size of earlier to avoid generating
+ // writes to addresses which will definitely be overwritten later
+ if (LaterOff > EarlierOff &&
+ LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
+ return OverwriteEnd;
// Otherwise, they don't completely overlap.
- return false;
+ return OverwriteUnknown;
}
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -494,22 +544,52 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// If we find a write that is a) removable (i.e., non-volatile), b) is
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
- if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+ if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
- << *DepWrite << "\n KILLER: " << *Inst << '\n');
-
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD);
- ++NumFastStores;
- MadeChange = true;
-
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst;
- if (BBI != BB.begin())
- --BBI;
- break;
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
+ DepWriteOffset, InstWriteOffset);
+ if (OR == OverwriteComplete) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
+
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
+ } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
+ // TODO: base this on the target vector size so that if the earlier
+ // store was too small to get vector writes anyway then its likely
+ // a good idea to shorten it
+ // Power of 2 vector writes are probably always a bad idea to optimize
+ // as any store/memset/memcpy is likely using vector instructions so
+ // shortening it to not vector size is likely to be slower
+ MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+ unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+ if (llvm::isPowerOf2_64(InstWriteOffset) ||
+ ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
+
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
+ << *DepWrite << "\n KILLER (offset "
+ << InstWriteOffset << ", "
+ << DepLoc.Size << ")"
+ << *Inst << '\n');
+
+ Value* DepWriteLength = DepIntrinsic->getLength();
+ Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
+ InstWriteOffset -
+ DepWriteOffset);
+ DepIntrinsic->setLength(TrimmedLength);
+ MadeChange = true;
+ }
+ }
}
// If this is a may-aliased store that is clobbering the store value, we
@@ -538,37 +618,67 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
+/// Find all blocks that will unconditionally lead to the block BB and append
+/// them to F.
+static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
+ BasicBlock *BB, DominatorTree *DT) {
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *Pred = *I;
+ if (Pred == BB) continue;
+ TerminatorInst *PredTI = Pred->getTerminator();
+ if (PredTI->getNumSuccessors() != 1)
+ continue;
+
+ if (DT->isReachableFromEntry(Pred))
+ Blocks.push_back(Pred);
+ }
+}
+
/// HandleFree - Handle frees of entire structures whose dependency is a store
/// to a field of that structure.
bool DSE::HandleFree(CallInst *F) {
bool MadeChange = false;
- MemDepResult Dep = MD->getDependency(F);
+ AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
+ SmallVector<BasicBlock *, 16> Blocks;
+ Blocks.push_back(F->getParent());
- while (Dep.isDef() || Dep.isClobber()) {
- Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
- return MadeChange;
+ while (!Blocks.empty()) {
+ BasicBlock *BB = Blocks.pop_back_val();
+ Instruction *InstPt = BB->getTerminator();
+ if (BB == F->getParent()) InstPt = F;
- Value *DepPointer =
- GetUnderlyingObject(getStoredPointerOperand(Dependency));
+ MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+ while (Dep.isDef() || Dep.isClobber()) {
+ Instruction *Dependency = Dep.getInst();
+ if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ break;
- // Check for aliasing.
- if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
- return MadeChange;
+ Value *DepPointer =
+ GetUnderlyingObject(getStoredPointerOperand(Dependency));
- // DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD);
- ++NumFastStores;
- MadeChange = true;
+ // Check for aliasing.
+ if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+ break;
- // Inst's old Dependency is now deleted. Compute the next dependency,
- // which may also be dead, as in
- // s[0] = 0;
- // s[1] = 0; // This has just been deleted.
- // free(s);
- Dep = MD->getDependency(F);
- };
+ Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency, *MD);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // Inst's old Dependency is now deleted. Compute the next dependency,
+ // which may also be dead, as in
+ // s[0] = 0;
+ // s[1] = 0; // This has just been deleted.
+ // free(s);
+ Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB);
+ }
+
+ if (Dep.isNonLocal())
+ FindUnconditionalPreds(Blocks, BB, DT);
+ }
return MadeChange;
}
@@ -588,10 +698,17 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
- for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+ for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
DeadStackObjects.insert(AI);
+ // Okay, so these are dead heap objects, but if the pointer never escapes
+ // then it's leaked by this function anyways.
+ if (CallInst *CI = extractMallocCall(I))
+ if (!PointerMayBeCaptured(CI, true, true))
+ DeadStackObjects.insert(CI);
+ }
+
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
@@ -637,6 +754,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
+ if (CallInst *CI = extractMallocCall(BBI)) {
+ DeadStackObjects.erase(CI);
+ continue;
+ }
+
if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't be loading any of our
// pointers.
@@ -732,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
I != E; ++I)
DeadStackObjects.erase(*I);
}
-
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c0223d2bf199..f3c92d64c2a3 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,11 +19,13 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include <deque>
using namespace llvm;
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
@@ -215,6 +217,7 @@ namespace {
class EarlyCSE : public FunctionPass {
public:
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
@@ -257,12 +260,77 @@ public:
bool runOnFunction(Function &F);
private:
-
+
+ // NodeScope - almost a POD, but needs to call the constructors for the
+ // scoped hash tables so that a new scope gets pushed on. These are RAII so
+ // that the scope gets popped when the NodeScope is destroyed.
+ class NodeScope {
+ public:
+ NodeScope(ScopedHTType *availableValues,
+ LoadHTType *availableLoads,
+ CallHTType *availableCalls) :
+ Scope(*availableValues),
+ LoadScope(*availableLoads),
+ CallScope(*availableCalls) {}
+
+ private:
+ NodeScope(const NodeScope&); // DO NOT IMPLEMENT
+
+ ScopedHTType::ScopeTy Scope;
+ LoadHTType::ScopeTy LoadScope;
+ CallHTType::ScopeTy CallScope;
+ };
+
+ // StackNode - contains all the needed information to create a stack for
+ // doing a depth first tranversal of the tree. This includes scopes for
+ // values, loads, and calls as well as the generation. There is a child
+ // iterator so that the children do not need to be store spearately.
+ class StackNode {
+ public:
+ StackNode(ScopedHTType *availableValues,
+ LoadHTType *availableLoads,
+ CallHTType *availableCalls,
+ unsigned cg, DomTreeNode *n,
+ DomTreeNode::iterator child, DomTreeNode::iterator end) :
+ CurrentGeneration(cg), ChildGeneration(cg), Node(n),
+ ChildIter(child), EndIter(end),
+ Scopes(availableValues, availableLoads, availableCalls),
+ Processed(false) {}
+
+ // Accessors.
+ unsigned currentGeneration() { return CurrentGeneration; }
+ unsigned childGeneration() { return ChildGeneration; }
+ void childGeneration(unsigned generation) { ChildGeneration = generation; }
+ DomTreeNode *node() { return Node; }
+ DomTreeNode::iterator childIter() { return ChildIter; }
+ DomTreeNode *nextChild() {
+ DomTreeNode *child = *ChildIter;
+ ++ChildIter;
+ return child;
+ }
+ DomTreeNode::iterator end() { return EndIter; }
+ bool isProcessed() { return Processed; }
+ void process() { Processed = true; }
+
+ private:
+ StackNode(const StackNode&); // DO NOT IMPLEMENT
+
+ // Members.
+ unsigned CurrentGeneration;
+ unsigned ChildGeneration;
+ DomTreeNode *Node;
+ DomTreeNode::iterator ChildIter;
+ DomTreeNode::iterator EndIter;
+ NodeScope Scopes;
+ bool Processed;
+ };
+
bool processNode(DomTreeNode *Node);
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
}
};
@@ -277,22 +345,10 @@ FunctionPass *llvm::createEarlyCSEPass() {
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
bool EarlyCSE::processNode(DomTreeNode *Node) {
- // Define a scope in the scoped hash table. When we are done processing this
- // domtree node and recurse back up to our parent domtree node, this will pop
- // off all the values we install.
- ScopedHTType::ScopeTy Scope(*AvailableValues);
-
- // Define a scope for the load values so that anything we add will get
- // popped when we recurse back up to our parent domtree node.
- LoadHTType::ScopeTy LoadScope(*AvailableLoads);
-
- // Define a scope for the call values so that anything we add will get
- // popped when we recurse back up to our parent domtree node.
- CallHTType::ScopeTy CallScope(*AvailableCalls);
-
BasicBlock *BB = Node->getBlock();
// If this block has a single predecessor, then the predecessor is the parent
@@ -328,7 +384,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -442,19 +498,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
}
}
-
- unsigned LiveOutGeneration = CurrentGeneration;
- for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
- Changed |= processNode(*I);
- // Pop any generation changes off the stack from the recursive walk.
- CurrentGeneration = LiveOutGeneration;
- }
+
return Changed;
}
bool EarlyCSE::runOnFunction(Function &F) {
+ std::deque<StackNode *> nodesToProcess;
+
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
// Tables that the pass uses when walking the domtree.
@@ -466,5 +519,52 @@ bool EarlyCSE::runOnFunction(Function &F) {
AvailableCalls = &CallTable;
CurrentGeneration = 0;
- return processNode(DT->getRootNode());
+ bool Changed = false;
+
+ // Process the root node.
+ nodesToProcess.push_front(
+ new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
+ CurrentGeneration, DT->getRootNode(),
+ DT->getRootNode()->begin(),
+ DT->getRootNode()->end()));
+
+ // Save the current generation.
+ unsigned LiveOutGeneration = CurrentGeneration;
+
+ // Process the stack.
+ while (!nodesToProcess.empty()) {
+ // Grab the first item off the stack. Set the current generation, remove
+ // the node from the stack, and process it.
+ StackNode *NodeToProcess = nodesToProcess.front();
+
+ // Initialize class members.
+ CurrentGeneration = NodeToProcess->currentGeneration();
+
+ // Check if the node needs to be processed.
+ if (!NodeToProcess->isProcessed()) {
+ // Process the node.
+ Changed |= processNode(NodeToProcess->node());
+ NodeToProcess->childGeneration(CurrentGeneration);
+ NodeToProcess->process();
+ } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
+ // Push the next child onto the stack.
+ DomTreeNode *child = NodeToProcess->nextChild();
+ nodesToProcess.push_front(
+ new StackNode(AvailableValues,
+ AvailableLoads,
+ AvailableCalls,
+ NodeToProcess->childGeneration(), child,
+ child->begin(), child->end()));
+ } else {
+ // It has been processed, and there are no more children to process,
+ // so delete it and pop it off the stack.
+ delete NodeToProcess;
+ nodesToProcess.pop_front();
+ }
+ } // while (!nodes...)
+
+ // Reset the current generation.
+ CurrentGeneration = LiveOutGeneration;
+
+ return Changed;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index cbfdbcddaeca..fb733ada5a19 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,10 +31,12 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Allocator.h"
@@ -83,6 +85,12 @@ namespace {
return false;
return true;
}
+
+ friend hash_code hash_value(const Expression &Value) {
+ return hash_combine(Value.opcode, Value.type,
+ hash_combine_range(Value.varargs.begin(),
+ Value.varargs.end()));
+ }
};
class ValueTable {
@@ -95,12 +103,17 @@ namespace {
uint32_t nextValueNumber;
Expression create_expression(Instruction* I);
+ Expression create_cmp_expression(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS);
Expression create_extractvalue_expression(ExtractValueInst* EI);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
uint32_t lookup_or_add(Value *V);
uint32_t lookup(Value *V) const;
+ uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS);
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
@@ -124,16 +137,8 @@ template <> struct DenseMapInfo<Expression> {
}
static unsigned getHashValue(const Expression e) {
- unsigned hash = e.opcode;
-
- hash = ((unsigned)((uintptr_t)e.type >> 4) ^
- (unsigned)((uintptr_t)e.type >> 9));
-
- for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
- E = e.varargs.end(); I != E; ++I)
- hash = *I + hash * 37;
-
- return hash;
+ using llvm::hash_value;
+ return static_cast<unsigned>(hash_value(e));
}
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
@@ -153,9 +158,24 @@ Expression ValueTable::create_expression(Instruction *I) {
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
e.varargs.push_back(lookup_or_add(*OI));
+ if (I->isCommutative()) {
+ // Ensure that commutative instructions that only differ by a permutation
+ // of their operands get the same value number by sorting the operand value
+ // numbers. Since all commutative instructions have two operands it is more
+ // efficient to sort by hand rather than using, say, std::sort.
+ assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
+ if (e.varargs[0] > e.varargs[1])
+ std::swap(e.varargs[0], e.varargs[1]);
+ }
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
- e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+ // Sort the operand value numbers so x<y and y>x get the same value number.
+ CmpInst::Predicate Predicate = C->getPredicate();
+ if (e.varargs[0] > e.varargs[1]) {
+ std::swap(e.varargs[0], e.varargs[1]);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+ e.opcode = (C->getOpcode() << 8) | Predicate;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -165,6 +185,25 @@ Expression ValueTable::create_expression(Instruction *I) {
return e;
}
+Expression ValueTable::create_cmp_expression(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
+ assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
+ "Not a comparison!");
+ Expression e;
+ e.type = CmpInst::makeCmpResultType(LHS->getType());
+ e.varargs.push_back(lookup_or_add(LHS));
+ e.varargs.push_back(lookup_or_add(RHS));
+
+ // Sort the operand value numbers so x<y and y>x get the same value number.
+ if (e.varargs[0] > e.varargs[1]) {
+ std::swap(e.varargs[0], e.varargs[1]);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+ e.opcode = (Opcode << 8) | Predicate;
+ return e;
+}
+
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
assert(EI != 0 && "Not an ExtractValueInst?");
Expression e;
@@ -414,6 +453,19 @@ uint32_t ValueTable::lookup(Value *V) const {
return VI->second;
}
+/// lookup_or_add_cmp - Returns the value number of the given comparison,
+/// assigning it a new number if it did not have one before. Useful when
+/// we deduced the result of a comparison, but don't immediately have an
+/// instruction realizing that comparison to hand.
+uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
+ Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ return e;
+}
+
/// clear - Remove all entries from the ValueTable.
void ValueTable::clear() {
valueNumbering.clear();
@@ -446,7 +498,8 @@ namespace {
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
const TargetData *TD;
-
+ const TargetLibraryInfo *TLI;
+
ValueTable VN;
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -530,6 +583,7 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
@@ -568,6 +622,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -776,7 +831,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
const TargetData &TD) {
- // If the loaded or stored value is an first class array or struct, don't try
+ // If the loaded or stored value is a first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
@@ -973,7 +1028,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
-/// GetStoreValueForLoad - This function is called when we have a
+/// GetLoadValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering load. This means
/// that the load *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
@@ -1274,14 +1329,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we had to process more than one hundred blocks to find the
// dependencies, this load isn't worth worrying about. Optimizing
// it will be too expensive.
- if (Deps.size() > 100)
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
return false;
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1
- && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber())
- {
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
@@ -1294,10 +1349,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
- SmallVector<BasicBlock*, 16> UnavailableBlocks;
+ SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
+ SmallVector<BasicBlock*, 64> UnavailableBlocks;
- for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1896,12 +1951,19 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
- Instruction *User = cast<Instruction>(*UI);
- unsigned OpNum = UI.getOperandNo();
- ++UI;
+ Use &U = (UI++).getUse();
+
+ // If From occurs as a phi node operand then the use implicitly lives in the
+ // corresponding incoming block. Otherwise it is the block containing the
+ // user that must be dominated by Root.
+ BasicBlock *UsingBlock;
+ if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
+ UsingBlock = PN->getIncomingBlock(U);
+ else
+ UsingBlock = cast<Instruction>(U.getUser())->getParent();
- if (DT->dominates(Root, User->getParent())) {
- User->setOperand(OpNum, To);
+ if (DT->dominates(Root, UsingBlock)) {
+ U.set(To);
++Count;
}
}
@@ -1912,69 +1974,119 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
- if (LHS == RHS) return false;
- assert(LHS->getType() == RHS->getType() && "Equal but types differ!");
+ SmallVector<std::pair<Value*, Value*>, 4> Worklist;
+ Worklist.push_back(std::make_pair(LHS, RHS));
+ bool Changed = false;
- // Don't try to propagate equalities between constants.
- if (isa<Constant>(LHS) && isa<Constant>(RHS))
- return false;
+ while (!Worklist.empty()) {
+ std::pair<Value*, Value*> Item = Worklist.pop_back_val();
+ LHS = Item.first; RHS = Item.second;
+
+ if (LHS == RHS) continue;
+ assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
+
+ // Don't try to propagate equalities between constants.
+ if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+
+ // Prefer a constant on the right-hand side, or an Argument if no constants.
+ if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
+ std::swap(LHS, RHS);
+ assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
+
+ // If there is no obvious reason to prefer the left-hand side over the right-
+ // hand side, ensure the longest lived term is on the right-hand side, so the
+ // shortest lived term will be replaced by the longest lived. This tends to
+ // expose more simplifications.
+ uint32_t LVN = VN.lookup_or_add(LHS);
+ if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
+ (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
+ // Move the 'oldest' value to the right-hand side, using the value number as
+ // a proxy for age.
+ uint32_t RVN = VN.lookup_or_add(RHS);
+ if (LVN < RVN) {
+ std::swap(LHS, RHS);
+ LVN = RVN;
+ }
+ }
+ assert((!isa<Instruction>(RHS) ||
+ DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
+ "Instruction doesn't dominate scope!");
+
+ // If value numbering later deduces that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'.
+ addToLeaderTable(LVN, RHS, Root);
+
+ // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
+ // LHS always has at least one use that is not dominated by Root, this will
+ // never do anything if LHS has only one use.
+ if (!LHS->hasOneUse()) {
+ unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ }
- // Make sure that any constants are on the right-hand side. In general the
- // best results are obtained by placing the longest lived value on the RHS.
- if (isa<Constant>(LHS))
- std::swap(LHS, RHS);
+ // Now try to deduce additional equalities from this one. For example, if the
+ // known equality was "(A != B)" == "false" then it follows that A and B are
+ // equal in the scope. Only boolean equalities with an explicit true or false
+ // RHS are currently supported.
+ if (!RHS->getType()->isIntegerTy(1))
+ // Not a boolean equality - bail out.
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ // RHS neither 'true' nor 'false' - bail out.
+ continue;
+ // Whether RHS equals 'true'. Otherwise it equals 'false'.
+ bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownFalse = !isKnownTrue;
+
+ // If "A && B" is known true then both A and B are known true. If "A || B"
+ // is known false then both A and B are known false.
+ Value *A, *B;
+ if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ Worklist.push_back(std::make_pair(A, RHS));
+ Worklist.push_back(std::make_pair(B, RHS));
+ continue;
+ }
- // If neither term is constant then bail out. This is not for correctness,
- // it's just that the non-constant case is much less useful: it occurs just
- // as often as the constant case but handling it hardly ever results in an
- // improvement.
- if (!isa<Constant>(RHS))
- return false;
+ // If we are propagating an equality like "(A == B)" == "true" then also
+ // propagate the equality A == B. When propagating a comparison such as
+ // "(A >= B)" == "true", replace all instances of "A < B" with "false".
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
- // If value numbering later deduces that an instruction in the scope is equal
- // to 'LHS' then ensure it will be turned into 'RHS'.
- addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root);
-
- // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.
- unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
- bool Changed = NumReplacements > 0;
- NumGVNEqProp += NumReplacements;
-
- // Now try to deduce additional equalities from this one. For example, if the
- // known equality was "(A != B)" == "false" then it follows that A and B are
- // equal in the scope. Only boolean equalities with an explicit true or false
- // RHS are currently supported.
- if (!RHS->getType()->isIntegerTy(1))
- // Not a boolean equality - bail out.
- return Changed;
- ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
- if (!CI)
- // RHS neither 'true' nor 'false' - bail out.
- return Changed;
- // Whether RHS equals 'true'. Otherwise it equals 'false'.
- bool isKnownTrue = CI->isAllOnesValue();
- bool isKnownFalse = !isKnownTrue;
-
- // If "A && B" is known true then both A and B are known true. If "A || B"
- // is known false then both A and B are known false.
- Value *A, *B;
- if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
- (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
- Changed |= propagateEquality(A, RHS, Root);
- Changed |= propagateEquality(B, RHS, Root);
- return Changed;
- }
+ // If "A == B" is known true, or "A != B" is known false, then replace
+ // A with B everywhere in the scope.
+ if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+ (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
+ Worklist.push_back(std::make_pair(Op0, Op1));
+
+ // If "A >= B" is known true, replace "A < B" with false everywhere.
+ CmpInst::Predicate NotPred = Cmp->getInversePredicate();
+ Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
+ // Since we don't have the instruction "A < B" immediately to hand, work out
+ // the value number that it would have and use that to find an appropriate
+ // instruction (if any).
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
+ // If the number we were assigned was brand new then there is no point in
+ // looking for an instruction realizing it: there cannot be one!
+ if (Num < NextNum) {
+ Value *NotCmp = findLeader(Root, Num);
+ if (NotCmp && isa<Instruction>(NotCmp)) {
+ unsigned NumReplacements =
+ replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ }
+ }
+ // Ensure that any instruction in scope that gets the "A < B" value number
+ // is replaced with false.
+ addToLeaderTable(Num, NotVal, Root);
- // If we are propagating an equality like "(A == B)" == "true" then also
- // propagate the equality A == B.
- if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
- // Only equality comparisons are supported.
- if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
- (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) {
- Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
- Changed |= propagateEquality(Op0, Op1, Root);
+ continue;
}
- return Changed;
}
return Changed;
@@ -1985,35 +2097,15 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
/// particular 'Dst' must not be reachable via another edge from 'Src'.
static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
DominatorTree *DT) {
- // First off, there must not be more than one edge from Src to Dst, there
- // should be exactly one. So keep track of the number of times Src occurs
- // as a predecessor of Dst and fail if it's more than once. Secondly, any
- // other predecessors of Dst should be dominated by Dst (see logic below).
- bool SawEdgeFromSrc = false;
- for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) {
- BasicBlock *Pred = *PI;
- if (Pred == Src) {
- // An edge from Src to Dst.
- if (SawEdgeFromSrc)
- // There are multiple edges from Src to Dst - fail.
- return false;
- SawEdgeFromSrc = true;
- continue;
- }
- // If the predecessor is not dominated by Dst, then it must be possible to
- // reach it either without passing through Src (and thus not via the edge)
- // or by passing through Src but taking a different edge out of Src. Either
- // way it is possible to reach Dst without passing via the edge, so fail.
- if (!DT->dominates(Dst, *PI))
- return false;
- }
- assert(SawEdgeFromSrc && "No edge between these basic blocks!");
-
- // Every path from the entry block to Dst must at some point pass to Dst from
- // a predecessor that is not dominated by Dst. This predecessor can only be
- // Src, since all others are dominated by Dst. As there is only one edge from
- // Src to Dst, the path passes by this edge.
- return true;
+ // While in theory it is interesting to consider the case in which Dst has
+ // more than one predecessor, because Dst might be part of a loop which is
+ // only reachable from Src, in practice it is pointless since at the time
+ // GVN runs all such loops have preheaders, which means that Dst will have
+ // been changed to have only one predecessor, namely Src.
+ BasicBlock *Pred = Dst->getSinglePredecessor();
+ assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+ (void)Src;
+ return Pred != 0;
}
/// processInstruction - When calculating availability, handle an instruction
@@ -2027,7 +2119,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2076,16 +2168,17 @@ bool GVN::processInstruction(Instruction *I) {
Value *SwitchCond = SI->getCondition();
BasicBlock *Parent = SI->getParent();
bool Changed = false;
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
- BasicBlock *Dst = SI->getSuccessor(i);
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *Dst = i.getCaseSuccessor();
if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
- Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
}
return Changed;
}
// Instructions with void type don't return a value, so there's
- // no point in trying to find redudancies in them.
+ // no point in trying to find redundancies in them.
if (I->getType()->isVoidTy()) return false;
uint32_t NextNum = VN.getNextUnusedValueNumber();
@@ -2101,7 +2194,7 @@ bool GVN::processInstruction(Instruction *I) {
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
- if (Num == NextNum) {
+ if (Num >= NextNum) {
addToLeaderTable(Num, I, I->getParent());
return false;
}
@@ -2129,6 +2222,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2241,7 +2335,14 @@ bool GVN::performPRE(Function &F) {
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
continue;
-
+
+ // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
+ // sinking the compare again, and it would force the code generator to
+ // move the i1 from processor flags or predicate registers into a general
+ // purpose register.
+ if (isa<CmpInst>(CurInst))
+ continue;
+
// We don't currently value number ANY inline asm calls.
if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
if (CallI->isInlineAsm())
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 5f863ea241ca..c2bd6e69ee10 100644
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -1,4 +1,4 @@
-//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -51,9 +51,8 @@
// note that we saved 2 registers here almostly "for free".
// ===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-global-merge"
-#include "ARM.h"
-#include "llvm/CodeGen/Passes.h"
+#define DEBUG_TYPE "global-merge"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Attributes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -66,10 +65,12 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
+STATISTIC(NumMerged , "Number of globals merged");
namespace {
- class ARMGlobalMerge : public FunctionPass {
+ class GlobalMerge : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// target type sizes.
const TargetLowering *TLI;
@@ -79,8 +80,10 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit ARMGlobalMerge(const TargetLowering *tli)
- : FunctionPass(ID), TLI(tli) {}
+ explicit GlobalMerge(const TargetLowering *tli = 0)
+ : FunctionPass(ID), TLI(tli) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
@@ -109,9 +112,12 @@ namespace {
};
} // end anonymous namespace
-char ARMGlobalMerge::ID = 0;
+char GlobalMerge::ID = 0;
+INITIALIZE_PASS(GlobalMerge, "global-merge",
+ "Global Merge", false, false)
+
-bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst) const {
const TargetData *TD = TLI->getTargetData();
@@ -153,6 +159,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
+ NumMerged++;
}
i = j;
}
@@ -161,7 +168,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
}
-bool ARMGlobalMerge::doInitialization(Module &M) {
+bool GlobalMerge::doInitialization(Module &M) {
SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
const TargetData *TD = TLI->getTargetData();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
@@ -175,7 +182,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
continue;
// Ignore fancy-aligned globals for now.
- unsigned Alignment = I->getAlignment();
+ unsigned Alignment = TD->getPreferredAlignment(I);
Type *Ty = I->getType()->getElementType();
if (Alignment > TD->getABITypeAlignment(Ty))
continue;
@@ -186,8 +193,8 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
continue;
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
- const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
- if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+ if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
+ .isBSSLocal())
BSSGlobals.push_back(I);
else if (I->isConstant())
ConstGlobals.push_back(I);
@@ -210,10 +217,10 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
return Changed;
}
-bool ARMGlobalMerge::runOnFunction(Function &F) {
+bool GlobalMerge::runOnFunction(Function &F) {
return false;
}
-FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
- return new ARMGlobalMerge(tli);
+Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
+ return new GlobalMerge(tli);
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 75fa011a14b7..a9ba6579db5a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -33,7 +33,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -50,30 +49,21 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumRemoved , "Number of aux indvars removed");
STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted , "Number of canonical indvars added");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-namespace llvm {
- cl::opt<bool> EnableIVRewrite(
- "enable-iv-rewrite", cl::Hidden,
- cl::desc("Enable canonical induction variable rewriting"));
-
- // Trip count verification can be enabled by default under NDEBUG if we
- // implement a strong expression equivalence checker in SCEV. Until then, we
- // use the verify-indvars flag, which may assert in some cases.
- cl::opt<bool> VerifyIndvars(
- "verify-indvars", cl::Hidden,
- cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
namespace {
class IndVarSimplify : public LoopPass {
- IVUsers *IU;
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
@@ -84,7 +74,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+ IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -97,13 +87,9 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- if (EnableIVRewrite)
- AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- if (EnableIVRewrite)
- AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
@@ -121,8 +107,6 @@ namespace {
void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar, SCEVExpander &Rewriter);
@@ -138,7 +122,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
@@ -180,6 +163,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// base of a recurrence. This handles the case in which SCEV expansion
// converts a pointer type recurrence into a nonrecurrent pointer base
// indexed by an integer recurrence.
+
+ // If the GEP base pointer is a vector of pointers, abort.
+ if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+ return false;
+
const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
if (FromBase == ToBase)
@@ -445,11 +433,6 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN);
}
-
- // Add a new IVUsers entry for the newly-created integer PHI.
- if (IU)
- IU->AddUsersIfInteresting(NewPHI);
-
Changed = true;
}
@@ -595,124 +578,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
}
//===----------------------------------------------------------------------===//
-// Rewrite IV users based on a canonical IV.
-// Only for use with -enable-iv-rewrite.
-//===----------------------------------------------------------------------===//
-
-/// FIXME: It is an extremely bad idea to indvar substitute anything more
-/// complex than affine induction variables. Doing so will put expensive
-/// polynomial evaluations inside of the loop, and the str reduction pass
-/// currently can only reduce affine polynomials. For now just disable
-/// indvar subst on anything more complex than an affine addrec, unless
-/// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
- // Loop-invariant values are safe.
- if (SE->isLoopInvariant(S, L)) return true;
-
- // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
- // to transform them into efficient code.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
- return AR->isAffine();
-
- // An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative
- = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
- E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L, SE)) return false;
- return true;
- }
-
- // A cast is safe if its operand is.
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L, SE);
-
- // A udiv is safe if its operands are.
- if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L, SE) &&
- isSafe(UD->getRHS(), L, SE);
-
- // SCEVUnknown is always safe.
- if (isa<SCEVUnknown>(S))
- return true;
-
- // Nothing else is safe.
- return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
- // Rewrite all induction variable expressions in terms of the canonical
- // induction variable.
- //
- // If there were induction variables of other sizes or offsets, manually
- // add the offsets to the primary induction variable and cast, avoiding
- // the need for the code evaluation methods to insert induction variables
- // of different sizes.
- for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
- Value *Op = UI->getOperandValToReplace();
- Type *UseTy = Op->getType();
- Instruction *User = UI->getUser();
-
- // Compute the final addrec to expand into code.
- const SCEV *AR = IU->getReplacementExpr(*UI);
-
- // Evaluate the expression out of the loop, if possible.
- if (!L->contains(UI->getUser())) {
- const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (SE->isLoopInvariant(ExitVal, L))
- AR = ExitVal;
- }
-
- // FIXME: It is an extremely bad idea to indvar substitute anything more
- // complex than affine induction variables. Doing so will put expensive
- // polynomial evaluations inside of the loop, and the str reduction pass
- // currently can only reduce affine polynomials. For now just disable
- // indvar subst on anything more complex than an affine addrec, unless
- // it can be expanded to a trivial value.
- if (!isSafe(AR, L, SE))
- continue;
-
- // Determine the insertion point for this user. By default, insert
- // immediately before the user. The SCEVExpander class will automatically
- // hoist loop invariants out of the loop. For PHI nodes, there may be
- // multiple uses, so compute the nearest common dominator for the
- // incoming blocks.
- Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
-
- // Now expand it into actual Instructions and patch it into place.
- Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
-
- if (!isValidRewrite(Op, NewVal)) {
- DeadInsts.push_back(NewVal);
- continue;
- }
- // Inform ScalarEvolution that this value is changing. The change doesn't
- // affect its value, but it does potentially affect which use lists the
- // value will be on after the replacement, which affects ScalarEvolution's
- // ability to walk use lists and drop dangling pointers when a value is
- // deleted.
- SE->forgetValue(User);
-
- // Patch the new value into place.
- if (Op->hasName())
- NewVal->takeName(Op);
- if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
- NewValI->setDebugLoc(User->getDebugLoc());
- User->replaceUsesOfWith(Op, NewVal);
- UI->setOperandValToReplace(NewVal);
-
- ++NumRemoved;
- Changed = true;
-
- // The old value may be dead now.
- DeadInsts.push_back(Op);
- }
-}
-
-//===----------------------------------------------------------------------===//
// IV Widening - Extend the width of an IV to cover its widest uses.
//===----------------------------------------------------------------------===//
@@ -843,7 +708,7 @@ protected:
const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
- Instruction *WidenIVUse(NarrowIVDefUse DU);
+ Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
@@ -917,7 +782,6 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
}
return WideBO;
}
- llvm_unreachable(0);
}
/// No-wrap operations can transfer sign extension of their result to their
@@ -946,9 +810,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
else
return 0;
+ // When creating this AddExpr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
- SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
- IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
return 0;
@@ -983,7 +851,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
+Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
if (isa<PHINode>(DU.NarrowUse) &&
@@ -1051,7 +919,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
// NarrowUse.
Instruction *WideUse = 0;
if (WideAddRec == WideIncExpr
- && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
+ && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
@@ -1156,7 +1024,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *WideUse = WidenIVUse(DU);
+ Instruction *WideUse = WidenIVUse(DU, Rewriter);
// Follow all def-use edges from the previous narrow use.
if (WideUse)
@@ -1231,7 +1099,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
/// BackedgeTakenInfo. If these expressions have not been reduced, then
/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution *SE) {
+ if (!Processed.insert(S))
+ return false;
+
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
// precise expression, rather than a UDiv from the user's code. If we can't
@@ -1250,16 +1122,13 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
}
}
- if (EnableIVRewrite)
- return false;
-
// Recurse past add expressions, which commonly occur in the
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
- if (isHighCostExpansion(*I, BI, SE))
+ if (isHighCostExpansion(*I, BI, Processed, SE))
return true;
}
return false;
@@ -1270,14 +1139,24 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
return true;
- // If we haven't recognized an expensive SCEV patter, assume its an expression
- // produced by program code.
+ // If we haven't recognized an expensive SCEV pattern, assume it's an
+ // expression produced by program code.
return false;
}
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
/// sequence that can be used by LinearFunctionTestReplace.
+///
+/// TODO: This fails for pointer-type loop counters with greater than one byte
+/// strides, consequently preventing LFTR from running. For the purpose of LFTR
+/// we could skip this check in the case that the LFTR loop counter (chosen by
+/// FindLoopCounter) is also pointer type. Instead, we could directly convert
+/// the loop test to an inequality test by checking the target data's alignment
+/// of element types (given that the initial pointer value originates from or is
+/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
+/// However, we don't yet have a strong motivation for converting loop tests
+/// into inequality tests.
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
@@ -1292,42 +1171,13 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
if (!BI)
return false;
- if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+ SmallPtrSet<const SCEV*, 8> Processed;
+ if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
return false;
return true;
}
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
-///
-/// TODO: Unnecessary when ForceLFTR is removed.
-static Type *getBackedgeIVType(Loop *L) {
- if (!L->getExitingBlock())
- return 0;
-
- // Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
- return 0;
-
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!Cond)
- return 0;
-
- Type *Ty = 0;
- for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
- OI != OE; ++OI) {
- assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
- TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
- if (!Trunc)
- continue;
-
- return Trunc->getSrcTy();
- }
- return Ty;
-}
-
/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
/// invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
@@ -1429,6 +1279,10 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// FindLoopCounter - Find an affine IV in canonical form.
///
+/// BECount may be an i8* pointer type. The pointer difference is already
+/// valid count without scaling the address stride, so it remains a pointer
+/// expression as far as SCEV is concerned.
+///
/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
///
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
@@ -1437,11 +1291,6 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
- // I'm not sure how BECount could be a pointer type, but we definitely don't
- // want to LFTR that.
- if (BECount->getType()->isPointerTy())
- return 0;
-
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
@@ -1458,6 +1307,10 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
if (!SE->isSCEVable(Phi->getType()))
continue;
+ // Avoid comparing an integer IV against a pointer Limit.
+ if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
+ continue;
+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
if (!AR || AR->getLoop() != L || !AR->isAffine())
continue;
@@ -1503,6 +1356,82 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
return BestPhi;
}
+/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
+/// holds the RHS of the new loop test.
+static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+ SCEVExpander &Rewriter, ScalarEvolution *SE) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ const SCEV *IVInit = AR->getStart();
+
+ // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
+ // finds a valid pointer IV. Sign extend BECount in order to materialize a
+ // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
+ // the existing GEPs whenever possible.
+ if (IndVar->getType()->isPointerTy()
+ && !IVCount->getType()->isPointerTy()) {
+
+ Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
+ const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+
+ // Expand the code for the iteration count.
+ assert(SE->isLoopInvariant(IVOffset, L) &&
+ "Computed iteration count is not loop invariant!");
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
+
+ Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+ assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
+ // We could handle pointer IVs other than i8*, but we need to compensate for
+ // gep index scaling. See canExpandBackedgeTakenCount comments.
+ assert(SE->getSizeOfExpr(
+ cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
+ && "unit stride pointer IV must be i8*");
+
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+ }
+ else {
+ // In any other case, convert both IVInit and IVCount to integers before
+ // comparing. This may result in SCEV expension of pointers, but in practice
+ // SCEV will fold the pointer arithmetic away as such:
+ // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
+ //
+ // Valid Cases: (1) both integers is most common; (2) both may be pointers
+ // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
+ // pointer may occur when enable-iv-rewrite generates a canonical IV on top
+ // of case #2.
+
+ const SCEV *IVLimit = 0;
+ // For unit stride, IVCount = Start + BECount with 2's complement overflow.
+ // For non-zero Start, compute IVCount here.
+ if (AR->getStart()->isZero())
+ IVLimit = IVCount;
+ else {
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ const SCEV *IVInit = AR->getStart();
+
+ // For integer IVs, truncate the IV before computing IVInit + BECount.
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(IVCount->getType()))
+ IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+
+ IVLimit = SE->getAddExpr(IVInit, IVCount);
+ }
+ // Expand the code for the iteration count.
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ IRBuilder<> Builder(BI);
+ assert(SE->isLoopInvariant(IVLimit, L) &&
+ "Computed iteration count is not loop invariant!");
+ // Ensure that we generate the same type as IndVar, or a smaller integer
+ // type. In the presence of null pointer values, we have an integer type
+ // SCEV expression (IVInit) for a pointer type IV value (IndVar).
+ Type *LimitTy = IVCount->getType()->isPointerTy() ?
+ IndVar->getType() : IVCount->getType();
+ return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
+ }
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
@@ -1514,37 +1443,35 @@ LinearFunctionTestReplace(Loop *L,
PHINode *IndVar,
SCEVExpander &Rewriter) {
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
// LFTR can ignore IV overflow and truncate to the width of
// BECount. This avoids materializing the add(zext(add)) expression.
- Type *CntTy = !EnableIVRewrite ?
- BackedgeTakenCount->getType() : IndVar->getType();
+ Type *CntTy = BackedgeTakenCount->getType();
- const SCEV *IVLimit = BackedgeTakenCount;
+ const SCEV *IVCount = BackedgeTakenCount;
- // If the exiting block is not the same as the backedge block, we must compare
- // against the preincremented value, otherwise we prefer to compare against
- // the post-incremented value.
+ // If the exiting block is the same as the backedge block, we prefer to
+ // compare against the post-incremented value, otherwise we must compare
+ // against the preincremented value.
Value *CmpIndVar;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
const SCEV *N =
- SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
- if (CntTy == IVLimit->getType())
- IVLimit = N;
+ SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
+ if (CntTy == IVCount->getType())
+ IVCount = N;
else {
- const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+ const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
if ((isa<SCEVConstant>(N) && !N->isZero()) ||
SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
// No overflow. Cast the sum.
- IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+ IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
} else {
// Potential overflow. Cast before doing the add.
- IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
- IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+ IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
+ IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
}
}
// The BackedgeTaken expression contains the number of times that the
@@ -1552,62 +1479,17 @@ LinearFunctionTestReplace(Loop *L,
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
- // We have to use the preincremented value...
- IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+ // We must use the preincremented value...
+ IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
CmpIndVar = IndVar;
}
- // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
- // So for, non-zero start compute the IVLimit here.
- bool isPtrIV = false;
- Type *CmpTy = CntTy;
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
- assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
- if (!AR->getStart()->isZero()) {
- assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
- const SCEV *IVInit = AR->getStart();
-
- // For pointer types, sign extend BECount in order to materialize a GEP.
- // Note that for without EnableIVRewrite, we never run SCEVExpander on a
- // pointer type, because we must preserve the existing GEPs. Instead we
- // directly generate a GEP later.
- if (IVInit->getType()->isPointerTy()) {
- isPtrIV = true;
- CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
- IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
- }
- // For integer types, truncate the IV before computing IVInit + BECount.
- else {
- if (SE->getTypeSizeInBits(IVInit->getType())
- > SE->getTypeSizeInBits(CmpTy))
- IVInit = SE->getTruncateExpr(IVInit, CmpTy);
-
- IVLimit = SE->getAddExpr(IVInit, IVLimit);
- }
- }
- // Expand the code for the iteration count.
- IRBuilder<> Builder(BI);
-
- assert(SE->isLoopInvariant(IVLimit, L) &&
- "Computed iteration count is not loop invariant!");
- Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
-
- // Create a gep for IVInit + IVLimit from on an existing pointer base.
- assert(isPtrIV == IndVar->getType()->isPointerTy() &&
- "IndVar type must match IVInit type");
- if (isPtrIV) {
- Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
- assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
- assert(SE->getSizeOfExpr(
- cast<PointerType>(IVStart->getType())->getElementType())->isOne()
- && "unit stride pointer IV must be i8*");
-
- Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
- ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
- Builder.SetInsertPoint(BI);
- }
+ Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+ assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
+ && "genLoopLimit missed a cast");
// Insert a new icmp_ne or icmp_eq instruction before the branch.
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
P = ICmpInst::ICMP_NE;
@@ -1619,11 +1501,13 @@ LinearFunctionTestReplace(Loop *L,
<< " op:\t"
<< (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
<< " RHS:\t" << *ExitCnt << "\n"
- << " Expr:\t" << *IVLimit << "\n");
+ << " IVCount:\t" << *IVCount << "\n");
+ IRBuilder<> Builder(BI);
if (SE->getTypeSizeInBits(CmpIndVar->getType())
- > SE->getTypeSizeInBits(CmpTy)) {
- CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+ > SE->getTypeSizeInBits(ExitCnt->getType())) {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+ "lftr.wideiv");
}
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
@@ -1680,11 +1564,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<LandingPadInst>(I))
continue;
- // Don't sink static AllocaInsts out of the entry block, which would
- // turn them into dynamic allocas!
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- if (AI->isStaticAlloca())
- continue;
+ // Don't sink alloca: we never want to sink static alloca's out of the
+ // entry block, and correctly sinking dynamic alloca's requires
+ // checks for stacksave/stackrestore intrinsics.
+ // FIXME: Refactor this check somehow?
+ if (isa<AllocaInst>(I))
+ continue;
// Determine if there is a use in or before the loop (direct or
// otherwise).
@@ -1746,8 +1631,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- if (EnableIVRewrite)
- IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
@@ -1774,10 +1657,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
- if (!EnableIVRewrite) {
- Rewriter.disableCanonicalMode();
- SimplifyAndExtend(L, Rewriter, LPM);
- }
+ Rewriter.disableCanonicalMode();
+ SimplifyAndExtend(L, Rewriter, LPM);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -1788,106 +1669,28 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, Rewriter);
- // Eliminate redundant IV users.
- if (EnableIVRewrite)
- Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
-
// Eliminate redundant IV cycles.
- if (!EnableIVRewrite)
- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
-
- // Compute the type of the largest recurrence expression, and decide whether
- // a canonical induction variable should be inserted.
- Type *LargestType = 0;
- bool NeedCannIV = false;
- bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
- if (EnableIVRewrite && ExpandBECount) {
- // If we have a known trip count and a single exit block, we'll be
- // rewriting the loop exit test condition below, which requires a
- // canonical induction variable.
- NeedCannIV = true;
- Type *Ty = BackedgeTakenCount->getType();
- if (!EnableIVRewrite) {
- // In this mode, SimplifyIVUsers may have already widened the IV used by
- // the backedge test and inserted a Trunc on the compare's operand. Get
- // the wider type to avoid creating a redundant narrow IV only used by the
- // loop test.
- LargestType = getBackedgeIVType(L);
- }
- if (!LargestType ||
- SE->getTypeSizeInBits(Ty) >
- SE->getTypeSizeInBits(LargestType))
- LargestType = SE->getEffectiveSCEVType(Ty);
- }
- if (EnableIVRewrite) {
- for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
- NeedCannIV = true;
- Type *Ty =
- SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
- if (!LargestType ||
- SE->getTypeSizeInBits(Ty) >
- SE->getTypeSizeInBits(LargestType))
- LargestType = Ty;
- }
- }
-
- // Now that we know the largest of the induction variable expressions
- // in this loop, insert a canonical induction variable of the largest size.
- PHINode *IndVar = 0;
- if (NeedCannIV) {
- // Check to see if the loop already has any canonical-looking induction
- // variables. If any are present and wider than the planned canonical
- // induction variable, temporarily remove them, so that the Rewriter
- // doesn't attempt to reuse them.
- SmallVector<PHINode *, 2> OldCannIVs;
- while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
- if (SE->getTypeSizeInBits(OldCannIV->getType()) >
- SE->getTypeSizeInBits(LargestType))
- OldCannIV->removeFromParent();
- else
- break;
- OldCannIVs.push_back(OldCannIV);
- }
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
- IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
-
- ++NumInserted;
- Changed = true;
- DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
-
- // Now that the official induction variable is established, reinsert
- // any old canonical-looking variables after it so that the IR remains
- // consistent. They will be deleted as part of the dead-PHI deletion at
- // the end of the pass.
- while (!OldCannIVs.empty()) {
- PHINode *OldCannIV = OldCannIVs.pop_back_val();
- OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
- }
- }
- else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
- IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
- }
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- Value *NewICmp = 0;
- if (ExpandBECount && IndVar) {
- // Check preconditions for proper SCEVExpander operation. SCEV does not
- // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
- // pass that uses the SCEVExpander must do it. This does not work well for
- // loop passes because SCEVExpander makes assumptions about all loops, while
- // LoopPassManager only forces the current loop to be simplified.
- //
- // FIXME: SCEV expansion has no way to bail out, so the caller must
- // explicitly check any assumptions made by SCEV. Brittle.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
- if (!AR || AR->getLoop()->getLoopPreheader())
- NewICmp =
- LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
+ if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ if (IndVar) {
+ // Check preconditions for proper SCEVExpander operation. SCEV does not
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+ // pass that uses the SCEVExpander must do it. This does not work well for
+ // loop passes because SCEVExpander makes assumptions about all loops, while
+ // LoopPassManager only forces the current loop to be simplified.
+ //
+ // FIXME: SCEV expansion has no way to bail out, so the caller must
+ // explicitly check any assumptions made by SCEV. Brittle.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ if (!AR || AR->getLoop()->getLoopPreheader())
+ (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Rewriter);
+ }
}
- // Rewrite IV-derived expressions.
- if (EnableIVRewrite)
- RewriteIVExpressions(L, Rewriter);
-
// Clear the rewriter cache, because values that are in the rewriter's cache
// can be deleted in the loop below, causing the AssertingVH in the cache to
// trigger.
@@ -1906,13 +1709,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// loop may be sunk below the loop to reduce register pressure.
SinkUnusedInvariants(L);
- // For completeness, inform IVUsers of the IV use in the newly-created
- // loop exit test instruction.
- if (IU && NewICmp) {
- ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
- if (NewICmpInst)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
- }
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
// Check a post-condition.
@@ -1922,8 +1718,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
#ifndef NDEBUG
- if (!EnableIVRewrite && VerifyIndvars &&
- !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index f410af32759c..429b61b6e501 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
@@ -75,6 +76,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -107,6 +109,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
void FindLoopHeaders(Function &F);
@@ -133,6 +136,7 @@ char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
@@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -852,6 +857,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (BBIt != LoadBB->begin())
return false;
+ // If all of the loads and stores that feed the value have the same TBAA tag,
+ // then we can propagate it onto any newly inserted loads.
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -870,11 +878,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ MDNode *ThisTBAATag = 0;
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ 0, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
+
+ // If tbaa tags disagree or are not present, forget about them.
+ if (TBAATag != ThisTBAATag) TBAATag = 0;
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -921,8 +934,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -935,6 +947,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
LI->getAlignment(),
UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
+ if (TBAATag)
+ NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -1082,9 +1097,9 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
- else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
- DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
- else {
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+ } else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
DestBB = cast<BlockAddress>(Val)->getBasicBlock();
@@ -1334,8 +1349,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// And finally, do it!
@@ -1479,8 +1493,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index b79bb1300fec..8795cd853fae 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,8 +43,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -84,6 +87,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<TargetLibraryInfo>();
}
bool doFinalization() {
@@ -96,6 +100,9 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
+ TargetData *TD; // TargetData for constant folding.
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+
// State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
@@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I)) {
+ if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -369,6 +380,8 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
+ if (LI->getMetadata("invariant.load"))
+ return true;
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
@@ -579,7 +592,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (Inst.isSafeToSpeculativelyExecute())
+ if (isSafeToSpeculativelyExecute(&Inst))
return true;
return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
new file mode 100644
index 000000000000..cee911976804
--- /dev/null
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/Scalar/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Scalar
+parent = Transforms
+library_name = ScalarOpts
+required_libraries = Analysis Core InstCombine Support Target TransformUtils
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index af25c5c1a661..f0f05e6f500a 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
@@ -43,6 +44,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
AU.addPreserved("scalar-evolution");
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
@@ -50,6 +52,7 @@ namespace {
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = &getAnalysis<LoopInfo>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, TD, DT);
+ Value *V = SimplifyInstruction(I, TD, TLI, DT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 9fd0958fd4c3..59aace9e36dd 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -52,13 +53,14 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
+ void simplifyLoopLatch(Loop *L);
bool rotateLoop(Loop *L);
-
+
private:
LoopInfo *LI;
};
}
-
+
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -73,6 +75,11 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ simplifyLoopLatch(L);
+
// One loop can be rotated multiple times.
bool MadeChange = false;
while (rotateLoop(L))
@@ -92,18 +99,18 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
BasicBlock::iterator I, E = OrigHeader->end();
for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
-
+
// Now fix up users of the instructions in OrigHeader, inserting PHI nodes
// as necessary.
SSAUpdater SSA;
for (I = OrigHeader->begin(); I != E; ++I) {
Value *OrigHeaderVal = I;
-
+
// If there are no uses of the value (e.g. because it returns void), there
// is nothing to rewrite.
if (OrigHeaderVal->use_empty())
continue;
-
+
Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
// The value now exits in two versions: the initial value in the preheader
@@ -111,27 +118,27 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
-
+
// Visit each use of the OrigHeader instruction.
for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
UE = OrigHeaderVal->use_end(); UI != UE; ) {
// Grab the use before incrementing the iterator.
Use &U = UI.getUse();
-
+
// Increment the iterator before removing the use from the list.
++UI;
-
+
// SSAUpdater can't handle a non-PHI use in the same block as an
// earlier def. We can easily handle those cases manually.
Instruction *UserInst = cast<Instruction>(U.getUser());
if (!isa<PHINode>(UserInst)) {
BasicBlock *UserBB = UserInst->getParent();
-
+
// The original users in the OrigHeader are already using the
// original definitions.
if (UserBB == OrigHeader)
continue;
-
+
// Users in the OrigPreHeader need to use the value to which the
// original definitions are mapped.
if (UserBB == OrigPreheader) {
@@ -139,32 +146,128 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
continue;
}
}
-
+
// Anything else can be handled by SSAUpdater.
SSA.RewriteUse(U);
}
}
-}
+}
+
+/// Determine whether the instructions in this range my be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ bool seenIncrement = false;
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the cast of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+void LoopRotate::simplifyLoopLatch(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
+ return;
+
+ DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->eraseNode(Latch);
+ Latch->eraseFromParent();
+}
/// Rotate loop LP. Return true if the loop is rotated.
bool LoopRotate::rotateLoop(Loop *L) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
-
+
BasicBlock *OrigHeader = L->getHeader();
-
+
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
if (BI == 0 || BI->isUnconditional())
return false;
-
+
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
if (!L->isLoopExiting(OrigHeader))
return false;
- // Updating PHInodes in loops with multiple exits adds complexity.
+ // Updating PHInodes in loops with multiple exits adds complexity.
// Keep it simple, and restrict loop rotation to loops with one exit only.
// In future, lift this restriction and support for multiple exits if
// required.
@@ -184,7 +287,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
BasicBlock *OrigLatch = L->getLoopLatch();
-
+
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
if (OrigPreheader == 0 || OrigLatch == 0)
@@ -203,9 +306,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (L->contains(Exit))
std::swap(Exit, NewHeader);
assert(NewHeader && "Unable to determine new loop header");
- assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
"Unable to determine loop header and exit blocks");
-
+
// This code assumes that the new header has exactly one predecessor.
// Remove any single-entry PHI nodes in it.
assert(NewHeader->getSinglePredecessor() &&
@@ -227,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
while (I != E) {
Instruction *Inst = I++;
-
+
// If the instruction's operands are invariant and it doesn't read or write
// memory, then it is safe to hoist. Doing this doesn't change the order of
// execution in the preheader, but does prevent the instruction from
@@ -236,18 +339,19 @@ bool LoopRotate::rotateLoop(Loop *L) {
// memory (without proving that the loop doesn't write).
if (L->hasLoopInvariantOperands(Inst) &&
!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
- !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
+ !isa<AllocaInst>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
continue;
}
-
+
// Otherwise, create a duplicate of the instruction.
Instruction *C = Inst->clone();
-
+
// Eagerly remap the operands of the instruction.
RemapInstruction(C, ValueMap,
RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
-
+
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
@@ -287,7 +391,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
L->moveToHeader(NewHeader);
assert(L->getHeader() == NewHeader && "Latch block is our new header");
-
+
// At this point, we've finished our major CFG changes. As part of cloning
// the loop into the preheader we've simplified instructions and the
// duplicated conditional branch may now be branching on a constant. If it is
@@ -308,16 +412,16 @@ bool LoopRotate::rotateLoop(Loop *L) {
// the dominator of Exit.
DT->changeImmediateDominator(Exit, OrigPreheader);
DT->changeImmediateDominator(NewHeader, OrigPreheader);
-
+
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
-
+
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
// thus is not a preheader anymore. Split the edge to form a real preheader.
BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
NewPH->setName(NewHeader->getName() + ".lr.ph");
-
+
// Preserve canonical loop form, which means that 'Exit' should have only one
// predecessor.
BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
@@ -329,7 +433,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
NewBI->setDebugLoc(PHBI->getDebugLoc());
PHBI->eraseFromParent();
-
+
// With our CFG finalized, update DomTree if it is available.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
// Update OrigHeader to be dominated by the new header block.
@@ -337,7 +441,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
}
-
+
assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
@@ -346,7 +450,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
// connected by an unconditional branch. This is just a cleanup so the
// emitted code isn't too gross in this common case.
MergeBlockIntoPredecessor(OrigHeader, this);
-
+
++NumRotated;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3e122c2a866e..d57ec22f44ab 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,19 +77,22 @@
#include <algorithm>
using namespace llvm;
-namespace llvm {
-cl::opt<bool> EnableNested(
- "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-
-cl::opt<bool> EnableRetry(
- "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
-
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
-cl::opt<bool> EnablePhiElim(
- "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
-}
+// This is now needed for ivchains.
+static cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::init(true),
+ cl::desc("Enable LSR phi elimination"));
+
+#ifndef NDEBUG
+// Stress test IV chain generation.
+static cl::opt<bool> StressIVChain(
+ "stress-ivchain", cl::Hidden, cl::init(false),
+ cl::desc("Stress test LSR IV chains"));
+#else
+static bool StressIVChain = false;
+#endif
namespace {
@@ -636,6 +639,91 @@ static Type *getAccessType(const Instruction *Inst) {
return AccessTy;
}
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
+/// Check if expanding this expression is likely to incur significant cost. This
+/// is tricky because SCEV doesn't track which expressions are actually computed
+/// by the current IR.
+///
+/// We currently allow expansion of IV increments that involve adds,
+/// multiplication by constants, and AddRecs from existing phis.
+///
+/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
+/// obvious multiple of the UDivExpr.
+static bool isHighCostExpansion(const SCEV *S,
+ SmallPtrSet<const SCEV*, 8> &Processed,
+ ScalarEvolution &SE) {
+ // Zero/One operand expressions
+ switch (S->getSCEVType()) {
+ case scUnknown:
+ case scConstant:
+ return false;
+ case scTruncate:
+ return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
+ Processed, SE);
+ case scZeroExtend:
+ return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+ Processed, SE);
+ case scSignExtend:
+ return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
+ Processed, SE);
+ }
+
+ if (!Processed.insert(S))
+ return false;
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansion(*I, Processed, SE))
+ return true;
+ }
+ return false;
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ if (Mul->getNumOperands() == 2) {
+ // Multiplication by a constant is ok
+ if (isa<SCEVConstant>(Mul->getOperand(0)))
+ return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
+
+ // If we have the value of one operand, check if an existing
+ // multiplication already generates this expression.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
+ Value *UVal = U->getValue();
+ for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
+ UI != UE; ++UI) {
+ // If U is a constant, it may be used by a ConstantExpr.
+ Instruction *User = dyn_cast<Instruction>(*UI);
+ if (User && User->getOpcode() == Instruction::Mul
+ && SE.isSCEVable(User->getType())) {
+ return SE.getSCEV(User) == Mul;
+ }
+ }
+ }
+ }
+ }
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (isExistingPhi(AR, SE))
+ return false;
+ }
+
+ // Fow now, consider any other type of expression (div/mul/min/max) high cost.
+ return true;
+}
+
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
@@ -705,7 +793,8 @@ public:
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
@@ -718,7 +807,8 @@ private:
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
@@ -729,41 +819,20 @@ void Cost::RateRegister(const SCEV *Reg,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- if (AR->getLoop() == L)
- AddRecCost += 1; /// TODO: This should be a function of the stride.
-
// If this is an addrec for another loop, don't second-guess its addrec phi
// nodes. LSR isn't currently smart enough to reason about more than one
- // loop at a time. LSR has either already run on inner loops, will not run
- // on other loops, and cannot be expected to change sibling loops. If the
- // AddRec exists, consider it's register free and leave it alone. Otherwise,
- // do not consider this formula at all.
- // FIXME: why do we need to generate such fomulae?
- else if (!EnableNested || L->contains(AR->getLoop()) ||
- (!AR->getLoop()->contains(L) &&
- DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- return;
- }
- if (!EnableNested) {
- Loose();
+ // loop at a time. LSR has already run on inner loops, will not run on outer
+ // loops, and cannot be expected to change sibling loops.
+ if (AR->getLoop() != L) {
+ // If the AddRec exists, consider it's register free and leave it alone.
+ if (isExistingPhi(AR, SE))
return;
- }
- // If this isn't one of the addrecs that the loop already has, it
- // would require a costly new phi and add. TODO: This isn't
- // precisely modeled right now.
- ++NumBaseAdds;
- if (!Regs.count(AR->getStart())) {
- RateRegister(AR->getStart(), Regs, L, SE, DT);
- if (isLoser())
- return;
- }
+
+ // Otherwise, do not consider this formula at all.
+ Loose();
+ return;
}
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
@@ -791,13 +860,22 @@ void Cost::RateRegister(const SCEV *Reg,
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg))
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
+ if (isLoser())
+ LoserRegs->insert(Reg);
+ }
}
void Cost::RateFormula(const Formula &F,
@@ -805,14 +883,15 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -823,7 +902,7 @@ void Cost::RateFormula(const Formula &F,
Loose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -1105,7 +1184,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
Formulae.push_back(F);
// Record registers now being used by this use.
- if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
@@ -1116,7 +1194,6 @@ void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
- assert(!Formulae.empty() && "LSRUse has no formulae left!");
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -1205,10 +1282,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
- return false;
+ if (!TLI)
+ return false;
+ // We have one of:
+ // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
+ // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+ // Offs is the ICmp immediate.
+ int64_t Offs = AM.BaseOffs;
+ if (AM.Scale == 0)
+ Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
+ return TLI->isLegalICmpImmediate(Offs);
}
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
return true;
case LSRUse::Basic:
@@ -1220,7 +1306,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
return AM.Scale == 0 || AM.Scale == -1;
}
- return false;
+ llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isLegalUse(TargetLowering::AddrMode AM,
@@ -1327,6 +1413,36 @@ struct UseMapDenseMapInfo {
}
};
+/// IVInc - An individual increment in a Chain of IV increments.
+/// Relate an IV user to an expression that computes the IV it uses from the IV
+/// used by the previous link in the Chain.
+///
+/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
+/// original IVOperand. The head of the chain's IVOperand is only valid during
+/// chain collection, before LSR replaces IV users. During chain generation,
+/// IncExpr can be used to find the new IVOperand that computes the same
+/// expression.
+struct IVInc {
+ Instruction *UserInst;
+ Value* IVOperand;
+ const SCEV *IncExpr;
+
+ IVInc(Instruction *U, Value *O, const SCEV *E):
+ UserInst(U), IVOperand(O), IncExpr(E) {}
+};
+
+// IVChain - The list of IV increments in program order.
+// We typically add the head of a chain without finding subsequent links.
+typedef SmallVector<IVInc,1> IVChain;
+
+/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
+/// Distinguish between FarUsers that definitely cross IV increments and
+/// NearUsers that may be used between IV increments.
+struct ChainUsers {
+ SmallPtrSet<Instruction*, 4> FarUsers;
+ SmallPtrSet<Instruction*, 4> NearUsers;
+};
+
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
@@ -1359,11 +1475,29 @@ class LSRInstance {
/// RegUses - Track which uses use which register candidates.
RegUseTracker RegUses;
+ // Limit the number of chains to avoid quadratic behavior. We don't expect to
+ // have more than a few IV increment chains in a loop. Missing a Chain falls
+ // back to normal LSR behavior for those uses.
+ static const unsigned MaxChains = 8;
+
+ /// IVChainVec - IV users can form a chain of IV increments.
+ SmallVector<IVChain, MaxChains> IVChainVec;
+
+ /// IVIncSet - IV users that belong to profitable IVChains.
+ SmallPtrSet<Use*, MaxChains> IVIncSet;
+
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
void OptimizeLoopTermCond();
+ void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec);
+ void FinalizeChain(IVChain &Chain);
+ void CollectChains();
+ void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts);
+
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
@@ -1389,7 +1523,6 @@ class LSRInstance {
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
-public:
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
@@ -1428,9 +1561,11 @@ public:
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
- BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
- const LSRFixup &LF,
- const LSRUse &LU) const;
+ BasicBlock::iterator
+ AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU,
+ SCEVExpander &Rewriter) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
@@ -1450,6 +1585,7 @@ public:
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
+public:
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -2045,7 +2181,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- Strides.insert(AR->getStepRecurrence(SE));
+ if (AR->getLoop() == L)
+ Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
@@ -2091,11 +2228,544 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
DEBUG(print_factors_and_types(dbgs()));
}
+/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
+/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
+/// Instructions to IVStrideUses, we could partially skip this.
+static User::op_iterator
+findIVOperand(User::op_iterator OI, User::op_iterator OE,
+ Loop *L, ScalarEvolution &SE) {
+ for(; OI != OE; ++OI) {
+ if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
+ if (!SE.isSCEVable(Oper->getType()))
+ continue;
+
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
+ if (AR->getLoop() == L)
+ break;
+ }
+ }
+ }
+ return OI;
+}
+
+/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
+/// operands, so wrap it in a convenient helper.
+static Value *getWideOperand(Value *Oper) {
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
+ return Trunc->getOperand(0);
+ return Oper;
+}
+
+/// isCompatibleIVType - Return true if we allow an IV chain to include both
+/// types.
+static bool isCompatibleIVType(Value *LVal, Value *RVal) {
+ Type *LType = LVal->getType();
+ Type *RType = RVal->getType();
+ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
+}
+
+/// getExprBase - Return an approximation of this SCEV expression's "base", or
+/// NULL for any constant. Returning the expression itself is
+/// conservative. Returning a deeper subexpression is more precise and valid as
+/// long as it isn't less complex than another subexpression. For expressions
+/// involving multiple unscaled values, we need to return the pointer-type
+/// SCEVUnknown. This avoids forming chains across objects, such as:
+/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+///
+/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
+/// SCEVUnknown, we simply return the rightmost SCEV operand.
+static const SCEV *getExprBase(const SCEV *S) {
+ switch (S->getSCEVType()) {
+ default: // uncluding scUnknown.
+ return S;
+ case scConstant:
+ return 0;
+ case scTruncate:
+ return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
+ case scZeroExtend:
+ return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
+ case scSignExtend:
+ return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
+ case scAddExpr: {
+ // Skip over scaled operands (scMulExpr) to follow add operands as long as
+ // there's nothing more complex.
+ // FIXME: not sure if we want to recognize negation.
+ const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
+ E(Add->op_begin()); I != E; ++I) {
+ const SCEV *SubExpr = *I;
+ if (SubExpr->getSCEVType() == scAddExpr)
+ return getExprBase(SubExpr);
+
+ if (SubExpr->getSCEVType() != scMulExpr)
+ return SubExpr;
+ }
+ return S; // all operands are scaled, be conservative.
+ }
+ case scAddRecExpr:
+ return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
+ }
+}
+
+/// Return true if the chain increment is profitable to expand into a loop
+/// invariant value, which may require its own register. A profitable chain
+/// increment will be an offset relative to the same base. We allow such offsets
+/// to potentially be used as chain increment as long as it's not obviously
+/// expensive to expand using real instructions.
+static const SCEV *
+getProfitableChainIncrement(Value *NextIV, Value *PrevIV,
+ const IVChain &Chain, Loop *L,
+ ScalarEvolution &SE, const TargetLowering *TLI) {
+ // Prune the solution space aggressively by checking that both IV operands
+ // are expressions that operate on the same unscaled SCEVUnknown. This
+ // "base" will be canceled by the subsequent getMinusSCEV call. Checking first
+ // avoids creating extra SCEV expressions.
+ const SCEV *OperExpr = SE.getSCEV(NextIV);
+ const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+ if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain)
+ return 0;
+
+ const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+ if (!SE.isLoopInvariant(IncExpr, L))
+ return 0;
+
+ // We are not able to expand an increment unless it is loop invariant,
+ // however, the following checks are purely for profitability.
+ if (StressIVChain)
+ return IncExpr;
+
+ // Do not replace a constant offset from IV head with a nonconstant IV
+ // increment.
+ if (!isa<SCEVConstant>(IncExpr)) {
+ const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand));
+ if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
+ return 0;
+ }
+
+ SmallPtrSet<const SCEV*, 8> Processed;
+ if (isHighCostExpansion(IncExpr, Processed, SE))
+ return 0;
+
+ return IncExpr;
+}
+
+/// Return true if the number of registers needed for the chain is estimated to
+/// be less than the number required for the individual IV users. First prohibit
+/// any IV users that keep the IV live across increments (the Users set should
+/// be empty). Next count the number and type of increments in the chain.
+///
+/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
+/// effectively use postinc addressing modes. Only consider it profitable it the
+/// increments can be computed in fewer registers when chained.
+///
+/// TODO: Consider IVInc free if it's already used in another chains.
+static bool
+isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
+ ScalarEvolution &SE, const TargetLowering *TLI) {
+ if (StressIVChain)
+ return true;
+
+ if (Chain.size() <= 2)
+ return false;
+
+ if (!Users.empty()) {
+ DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n";
+ for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
+ E = Users.end(); I != E; ++I) {
+ dbgs() << " " << **I << "\n";
+ });
+ return false;
+ }
+ assert(!Chain.empty() && "empty IV chains are not allowed");
+
+ // The chain itself may require a register, so intialize cost to 1.
+ int cost = 1;
+
+ // A complete chain likely eliminates the need for keeping the original IV in
+ // a register. LSR does not currently know how to form a complete chain unless
+ // the header phi already exists.
+ if (isa<PHINode>(Chain.back().UserInst)
+ && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) {
+ --cost;
+ }
+ const SCEV *LastIncExpr = 0;
+ unsigned NumConstIncrements = 0;
+ unsigned NumVarIncrements = 0;
+ unsigned NumReusedIncrements = 0;
+ for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+ I != E; ++I) {
+
+ if (I->IncExpr->isZero())
+ continue;
+
+ // Incrementing by zero or some constant is neutral. We assume constants can
+ // be folded into an addressing mode or an add's immediate operand.
+ if (isa<SCEVConstant>(I->IncExpr)) {
+ ++NumConstIncrements;
+ continue;
+ }
+
+ if (I->IncExpr == LastIncExpr)
+ ++NumReusedIncrements;
+ else
+ ++NumVarIncrements;
+
+ LastIncExpr = I->IncExpr;
+ }
+ // An IV chain with a single increment is handled by LSR's postinc
+ // uses. However, a chain with multiple increments requires keeping the IV's
+ // value live longer than it needs to be if chained.
+ if (NumConstIncrements > 1)
+ --cost;
+
+ // Materializing increment expressions in the preheader that didn't exist in
+ // the original code may cost a register. For example, sign-extended array
+ // indices can produce ridiculous increments like this:
+ // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+ cost += NumVarIncrements;
+
+ // Reusing variable increments likely saves a register to hold the multiple of
+ // the stride.
+ cost -= NumReusedIncrements;
+
+ DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n");
+
+ return cost < 0;
+}
+
+/// ChainInstruction - Add this IV user to an existing chain or make it the head
+/// of a new chain.
+void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec) {
+ // When IVs are used as types of varying widths, they are generally converted
+ // to a wider type with some uses remaining narrow under a (free) trunc.
+ Value *NextIV = getWideOperand(IVOper);
+
+ // Visit all existing chains. Check if its IVOper can be computed as a
+ // profitable loop invariant increment from the last link in the Chain.
+ unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ const SCEV *LastIncExpr = 0;
+ for (; ChainIdx < NChains; ++ChainIdx) {
+ Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand);
+ if (!isCompatibleIVType(PrevIV, NextIV))
+ continue;
+
+ // A phi node terminates a chain.
+ if (isa<PHINode>(UserInst)
+ && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
+ continue;
+
+ if (const SCEV *IncExpr =
+ getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx],
+ L, SE, TLI)) {
+ LastIncExpr = IncExpr;
+ break;
+ }
+ }
+ // If we haven't found a chain, create a new one, unless we hit the max. Don't
+ // bother for phi nodes, because they must be last in the chain.
+ if (ChainIdx == NChains) {
+ if (isa<PHINode>(UserInst))
+ return;
+ if (NChains >= MaxChains && !StressIVChain) {
+ DEBUG(dbgs() << "IV Chain Limit\n");
+ return;
+ }
+ LastIncExpr = SE.getSCEV(NextIV);
+ // IVUsers may have skipped over sign/zero extensions. We don't currently
+ // attempt to form chains involving extensions unless they can be hoisted
+ // into this loop's AddRec.
+ if (!isa<SCEVAddRecExpr>(LastIncExpr))
+ return;
+ ++NChains;
+ IVChainVec.resize(NChains);
+ ChainUsersVec.resize(NChains);
+ DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr
+ << "\n");
+ }
+ else
+ DEBUG(dbgs() << "IV Inc: (" << *UserInst << ") IV+" << *LastIncExpr
+ << "\n");
+
+ // Add this IV user to the end of the chain.
+ IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr));
+
+ SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
+ // This chain's NearUsers become FarUsers.
+ if (!LastIncExpr->isZero()) {
+ ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
+ NearUsers.end());
+ NearUsers.clear();
+ }
+
+ // All other uses of IVOperand become near uses of the chain.
+ // We currently ignore intermediate values within SCEV expressions, assuming
+ // they will eventually be used be the current chain, or can be computed
+ // from one of the chain increments. To be more precise we could
+ // transitively follow its user and only add leaf IV users to the set.
+ for (Value::use_iterator UseIter = IVOper->use_begin(),
+ UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
+ Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+ if (!OtherUse || OtherUse == UserInst)
+ continue;
+ if (SE.isSCEVable(OtherUse->getType())
+ && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
+ && IU.isIVUserOrOperand(OtherUse)) {
+ continue;
+ }
+ NearUsers.insert(OtherUse);
+ }
+
+ // Since this user is part of the chain, it's no longer considered a use
+ // of the chain.
+ ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
+}
+
+/// CollectChains - Populate the vector of Chains.
+///
+/// This decreases ILP at the architecture level. Targets with ample registers,
+/// multiple memory ports, and no register renaming probably don't want
+/// this. However, such targets should probably disable LSR altogether.
+///
+/// The job of LSR is to make a reasonable choice of induction variables across
+/// the loop. Subsequent passes can easily "unchain" computation exposing more
+/// ILP *within the loop* if the target wants it.
+///
+/// Finding the best IV chain is potentially a scheduling problem. Since LSR
+/// will not reorder memory operations, it will recognize this as a chain, but
+/// will generate redundant IV increments. Ideally this would be corrected later
+/// by a smart scheduler:
+/// = A[i]
+/// = A[i+x]
+/// A[i] =
+/// A[i+x] =
+///
+/// TODO: Walk the entire domtree within this loop, not just the path to the
+/// loop latch. This will discover chains on side paths, but requires
+/// maintaining multiple copies of the Chains state.
+void LSRInstance::CollectChains() {
+ SmallVector<ChainUsers, 8> ChainUsersVec;
+
+ SmallVector<BasicBlock *,8> LatchPath;
+ BasicBlock *LoopHeader = L->getHeader();
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
+ Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
+ LatchPath.push_back(Rung->getBlock());
+ }
+ LatchPath.push_back(LoopHeader);
+
+ // Walk the instruction stream from the loop header to the loop latch.
+ for (SmallVectorImpl<BasicBlock *>::reverse_iterator
+ BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
+ BBIter != BBEnd; ++BBIter) {
+ for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
+ I != E; ++I) {
+ // Skip instructions that weren't seen by IVUsers analysis.
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ continue;
+
+ // Ignore users that are part of a SCEV expression. This way we only
+ // consider leaf IV Users. This effectively rediscovers a portion of
+ // IVUsers analysis but in program order this time.
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ continue;
+
+ // Remove this instruction from any NearUsers set it may be in.
+ for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ ChainIdx < NChains; ++ChainIdx) {
+ ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ }
+ // Search for operands that can be chained.
+ SmallPtrSet<Instruction*, 4> UniqueOperands;
+ User::op_iterator IVOpEnd = I->op_end();
+ User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+ while (IVOpIter != IVOpEnd) {
+ Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
+ if (UniqueOperands.insert(IVOpInst))
+ ChainInstruction(I, IVOpInst, ChainUsersVec);
+ IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ } // Continue walking down the instructions.
+ } // Continue walking down the domtree.
+ // Visit phi backedges to determine if the chain can generate the IV postinc.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (!SE.isSCEVable(PN->getType()))
+ continue;
+
+ Instruction *IncV =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (IncV)
+ ChainInstruction(PN, IncV, ChainUsersVec);
+ }
+ // Remove any unprofitable chains.
+ unsigned ChainIdx = 0;
+ for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
+ UsersIdx < NChains; ++UsersIdx) {
+ if (!isProfitableChain(IVChainVec[UsersIdx],
+ ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+ continue;
+ // Preserve the chain at UsesIdx.
+ if (ChainIdx != UsersIdx)
+ IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
+ FinalizeChain(IVChainVec[ChainIdx]);
+ ++ChainIdx;
+ }
+ IVChainVec.resize(ChainIdx);
+}
+
+void LSRInstance::FinalizeChain(IVChain &Chain) {
+ assert(!Chain.empty() && "empty IV chains are not allowed");
+ DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n");
+
+ for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
+ User::op_iterator UseI =
+ std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
+ assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+ IVIncSet.insert(UseI);
+ }
+}
+
+/// Return true if the IVInc can be folded into an addressing mode.
+static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
+ Value *Operand, const TargetLowering *TLI) {
+ const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
+ if (!IncConst || !isAddressUse(UserInst, Operand))
+ return false;
+
+ if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ return false;
+
+ int64_t IncOffset = IncConst->getValue()->getSExtValue();
+ if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
+ LSRUse::Address, getAccessType(UserInst), TLI))
+ return false;
+
+ return true;
+}
+
+/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
+/// materialize the IV user's operand from the previous IV user's operand.
+void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) {
+ // Find the new IVOperand for the head of the chain. It may have been replaced
+ // by LSR.
+ const IVInc &Head = Chain[0];
+ User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
+ IVOpEnd, L, SE);
+ Value *IVSrc = 0;
+ while (IVOpIter != IVOpEnd) {
+ IVSrc = getWideOperand(*IVOpIter);
+
+ // If this operand computes the expression that the chain needs, we may use
+ // it. (Check this after setting IVSrc which is used below.)
+ //
+ // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
+ // narrow for the chain, so we can no longer use it. We do allow using a
+ // wider phi, assuming the LSR checked for free truncation. In that case we
+ // should already have a truncate on this operand such that
+ // getSCEV(IVSrc) == IncExpr.
+ if (SE.getSCEV(*IVOpIter) == Head.IncExpr
+ || SE.getSCEV(IVSrc) == Head.IncExpr) {
+ break;
+ }
+ IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ if (IVOpIter == IVOpEnd) {
+ // Gracefully give up on this chain.
+ DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
+ return;
+ }
+
+ DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
+ Type *IVTy = IVSrc->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(IVTy);
+ const SCEV *LeftOverExpr = 0;
+ for (IVChain::const_iterator IncI = llvm::next(Chain.begin()),
+ IncE = Chain.end(); IncI != IncE; ++IncI) {
+
+ Instruction *InsertPt = IncI->UserInst;
+ if (isa<PHINode>(InsertPt))
+ InsertPt = L->getLoopLatch()->getTerminator();
+
+ // IVOper will replace the current IV User's operand. IVSrc is the IV
+ // value currently held in a register.
+ Value *IVOper = IVSrc;
+ if (!IncI->IncExpr->isZero()) {
+ // IncExpr was the result of subtraction of two narrow values, so must
+ // be signed.
+ const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+ LeftOverExpr = LeftOverExpr ?
+ SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
+ }
+ if (LeftOverExpr && !LeftOverExpr->isZero()) {
+ // Expand the IV increment.
+ Rewriter.clearPostInc();
+ Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
+ const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
+ SE.getUnknown(IncV));
+ IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
+
+ // If an IV increment can't be folded, use it as the next IV value.
+ if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
+ TLI)) {
+ assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
+ IVSrc = IVOper;
+ LeftOverExpr = 0;
+ }
+ }
+ Type *OperTy = IncI->IVOperand->getType();
+ if (IVTy != OperTy) {
+ assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
+ "cannot extend a chained IV");
+ IRBuilder<> Builder(InsertPt);
+ IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
+ }
+ IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
+ DeadInsts.push_back(IncI->IVOperand);
+ }
+ // If LSR created a new, wider phi, we may also replace its postinc. We only
+ // do this if we also found a wide value for the head of the chain.
+ if (isa<PHINode>(Chain.back().UserInst)) {
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+ if (!isCompatibleIVType(Phi, IVSrc))
+ continue;
+ Instruction *PostIncV = dyn_cast<Instruction>(
+ Phi->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
+ continue;
+ Value *IVOper = IVSrc;
+ Type *PostIncTy = PostIncV->getType();
+ if (IVTy != PostIncTy) {
+ assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
+ IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
+ Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
+ IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
+ }
+ Phi->replaceUsesOfWith(PostIncV, IVOper);
+ DeadInsts.push_back(PostIncV);
+ }
+ }
+}
+
void LSRInstance::CollectFixupsAndInitialFormulae() {
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ Instruction *UserInst = UI->getUser();
+ // Skip IV users that are part of profitable IV Chains.
+ User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
+ UI->getOperandValToReplace());
+ assert(UseI != UserInst->op_end() && "cannot find IV operand");
+ if (IVIncSet.count(UseI))
+ continue;
+
// Record the uses.
LSRFixup &LF = getNewFixup();
- LF.UserInst = UI->getUser();
+ LF.UserInst = UserInst;
LF.OperandValToReplace = UI->getOperandValToReplace();
LF.PostIncLoops = UI->getPostIncLoops();
@@ -2914,6 +3584,7 @@ LSRInstance::GenerateAllReuseFormulae() {
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2933,46 +3604,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
- SmallVector<const SCEV *, 2> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
- if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
- Key.push_back(Reg);
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
- if (F.ScaledReg &&
- RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
- Key.push_back(F.ScaledReg);
- // Unstable sort by host order ok, because this is only used for
- // uniquifying.
- std::sort(Key.begin(), Key.end());
-
- std::pair<BestFormulaeTy::const_iterator, bool> P =
- BestFormulae.insert(std::make_pair(Key, FIdx));
- if (!P.second) {
+ else {
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
Formula &Best = LU.Formulae[P.first->second];
- Cost CostF;
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
- Regs.clear();
Cost CostBest;
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
+ }
#ifndef NDEBUG
- ChangedFormulae = true;
+ ChangedFormulae = true;
#endif
- LU.DeleteFormula(F);
- --FIdx;
- --NumForms;
- Any = true;
- continue;
- }
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
@@ -3284,24 +3975,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
if (LU.Regs.count(*I))
ReqRegs.insert(*I);
- bool AnySatisfiedReqRegs = false;
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
-retry:
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
// Ignore formulae which do not use any of the required registers.
+ bool SatisfiedReqReg = true;
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if ((!F.ScaledReg || F.ScaledReg != Reg) &&
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
- F.BaseRegs.end())
- goto skip;
+ F.BaseRegs.end()) {
+ SatisfiedReqReg = false;
+ break;
+ }
+ }
+ if (!SatisfiedReqReg) {
+ // If none of the formulae satisfied the required registers, then we could
+ // clear ReqRegs and try again. Currently, we simply give up in this case.
+ continue;
}
- AnySatisfiedReqRegs = true;
// Evaluate the cost of the current formula. If it's already worse than
// the current best, prune the search at that point.
@@ -3317,7 +4013,7 @@ retry:
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
- dbgs() << ". Regs:";
+ dbgs() << ".\n Regs:";
for (SmallPtrSet<const SCEV *, 16>::const_iterator
I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
dbgs() << ' ' << **I;
@@ -3328,18 +4024,6 @@ retry:
}
Workspace.pop_back();
}
- skip:;
- }
-
- if (!EnableRetry && !AnySatisfiedReqRegs)
- return;
-
- // If none of the formulae had all of the required registers, relax the
- // constraint so that we don't exclude all formulae.
- if (!AnySatisfiedReqRegs) {
- assert(!ReqRegs.empty() && "Solver failed even without required registers");
- ReqRegs.clear();
- goto retry;
}
}
@@ -3435,9 +4119,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
/// AdjustInsertPositionForExpand - Determine an input position which will be
/// dominated by the operands and which will dominate the result.
BasicBlock::iterator
-LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
- const LSRUse &LU) const {
+ const LSRUse &LU,
+ SCEVExpander &Rewriter) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
@@ -3472,9 +4157,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
}
}
+ assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+ && !isa<DbgInfoIntrinsic>(LowestIP) &&
+ "Insertion point must be a normal instruction");
+
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
- IP = HoistInsertPosition(IP, Inputs);
+ BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
@@ -3485,6 +4174,11 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+ // Set IP below instructions recently inserted by SCEVExpander. This keeps the
+ // IP consistent across expansions and allows the previously inserted
+ // instructions to be reused by subsequent expansion.
+ while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+
return IP;
}
@@ -3499,7 +4193,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
- IP = AdjustInsertPositionForExpand(IP, LF, LU);
+ IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
@@ -3775,10 +4469,20 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(SE, "lsr");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
Rewriter.disableCanonicalMode();
Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+ // Mark phi nodes that terminate chains so the expander tries to reuse them.
+ for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+ ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+ if (PHINode *PN = dyn_cast<PHINode>(ChainI->back().UserInst))
+ Rewriter.setChainedPhi(PN);
+ }
+
// Expand the new value definitions and update the users.
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
@@ -3789,6 +4493,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed = true;
}
+ for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+ ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+ GenerateIVChain(*ChainI, Rewriter, DeadInsts);
+ Changed = true;
+ }
// Clean up after ourselves. This must be done before deleting any
// instructions.
Rewriter.clear();
@@ -3804,11 +4513,29 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
- if (!L->isLoopSimplifyForm()) return;
+ if (!L->isLoopSimplifyForm())
+ return;
// If there's no interesting work to be done, bail early.
if (IU.empty()) return;
+#ifndef NDEBUG
+ // All dominating loops must have preheaders, or SCEVExpander may not be able
+ // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
+ //
+ // IVUsers analysis should only create users that are dominated by simple loop
+ // headers. Since this loop should dominate all of its users, its user list
+ // should be empty if this loop itself is not within a simple loop nest.
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
+ Rung; Rung = Rung->getIDom()) {
+ BasicBlock *BB = Rung->getBlock();
+ const Loop *DomLoop = LI.getLoopFor(BB);
+ if (DomLoop && DomLoop->getHeader() == BB) {
+ assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
+ }
+ }
+#endif // DEBUG
+
DEBUG(dbgs() << "\nLSR on loop ";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << ":\n");
@@ -3821,24 +4548,18 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
if (IU.empty()) return;
// Skip nested loops until we can model them better with formulae.
- if (!EnableNested && !L->empty()) {
-
- if (EnablePhiElim) {
- // Remove any extra phis created by processing inner loops.
- SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, "lsr");
- Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
- Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
- }
+ if (!L->empty()) {
DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
// Start collecting data and preparing for the solver.
+ CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
+ assert(!Uses.empty() && "IVUsers reported at least one use");
DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
@@ -3875,14 +4596,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// Now that we've decided what we want, make it so.
ImplementSolution(Solution, P);
-
- if (EnablePhiElim) {
- // Remove any extra phis created by processing inner loops.
- SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, "lsr");
- Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
- Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
- }
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -4008,9 +4721,21 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
// Run the main LSR transformation.
Changed |= LSRInstance(TLI, L, this).getChanged();
- // At this point, it is worth checking to see if any recurrence PHIs are also
- // dead, so that we can remove them as well.
+ // Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
-
+ if (EnablePhiElim) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+ unsigned numFolded = Rewriter.
+ replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+ if (numFolded) {
+ Changed = true;
+ DeleteTriviallyDeadInstructions(DeadInsts);
+ DeleteDeadPHIs(L->getHeader());
+ }
+ }
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 91395b2af6aa..09a186f7f940 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
-// Temporary flag to be removed in 3.0
static cl::opt<bool>
-NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
- cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
namespace {
class LoopUnroll : public LoopPass {
@@ -68,6 +67,10 @@ namespace {
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
+ // Default unroll count for loops with run-time trip count if
+ // -unroll-count is not set
+ static const unsigned UnrollRuntimeCount = 8;
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -101,6 +104,7 @@ INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
@@ -147,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
- if (!NoSCEVUnroll) {
- // Find "latch trip count". UnrollLoop assumes that control cannot exit
- // via the loop latch on any iteration prior to TripCount. The loop may exit
- // early via an earlier branch.
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (LatchBlock) {
- TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
- }
- }
- else {
- TripCount = L->getSmallConstantTripCount();
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
- // Automatically select an unroll count.
+ // Use a default unroll-count if the user doesn't specify a value
+ // and the trip count is a run-time value. The default is different
+ // for run-time or compile-time trip count loops.
unsigned Count = CurrentCount;
+ if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ Count = UnrollRuntimeCount;
+
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -188,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial) {
+ if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = Threshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
+ if (TripCount) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = Threshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0)
+ Count--;
+ }
+ else if (UnrollRuntime) {
+ // Reduce unroll count to be a lower power-of-two value
+ while (Count != 0 && Size > Threshold) {
+ Count >>= 1;
+ Size = LoopSize*Count;
+ }
}
if (Count < 2) {
DEBUG(dbgs() << " could not unroll partially\n");
@@ -207,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 458949c8444d..ee232687ffde 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -48,6 +48,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <map>
#include <set>
using namespace llvm;
@@ -56,14 +57,70 @@ STATISTIC(NumSwitches, "Number of switches unswitched");
STATISTIC(NumSelects , "Number of selects unswitched");
STATISTIC(NumTrivial , "Number of unswitches that are trivial");
STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+STATISTIC(TotalInsts, "Total number of instructions analyzed");
-// The specific value of 50 here was chosen based only on intuition and a
+// The specific value of 100 here was chosen based only on intuition and a
// few specific examples.
static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
- cl::init(50), cl::Hidden);
-
+ cl::init(100), cl::Hidden);
+
namespace {
+
+ class LUAnalysisCache {
+
+ typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> >
+ UnswitchedValsMap;
+
+ typedef UnswitchedValsMap::iterator UnswitchedValsIt;
+
+ struct LoopProperties {
+ unsigned CanBeUnswitchedCount;
+ unsigned SizeEstimation;
+ UnswitchedValsMap UnswitchedVals;
+ };
+
+ // Here we use std::map instead of DenseMap, since we need to keep valid
+ // LoopProperties pointer for current loop for better performance.
+ typedef std::map<const Loop*, LoopProperties> LoopPropsMap;
+ typedef LoopPropsMap::iterator LoopPropsMapIt;
+
+ LoopPropsMap LoopsProperties;
+ UnswitchedValsMap* CurLoopInstructions;
+ LoopProperties* CurrentLoopProperties;
+
+ // Max size of code we can produce on remained iterations.
+ unsigned MaxSize;
+
+ public:
+
+ LUAnalysisCache() :
+ CurLoopInstructions(NULL), CurrentLoopProperties(NULL),
+ MaxSize(Threshold)
+ {}
+
+ // Analyze loop. Check its size, calculate is it possible to unswitch
+ // it. Returns true if we can unswitch this loop.
+ bool countLoop(const Loop* L);
+
+ // Clean all data related to given loop.
+ void forgetLoop(const Loop* L);
+
+ // Mark case value as unswitched.
+ // Since SI instruction can be partly unswitched, in order to avoid
+ // extra unswitching in cloned loops keep track all unswitched values.
+ void setUnswitched(const SwitchInst* SI, const Value* V);
+
+ // Check was this case value unswitched before or not.
+ bool isUnswitched(const SwitchInst* SI, const Value* V);
+
+ // Clone all loop-unswitch related loop properties.
+ // Redistribute unswitching quotas.
+ // Note, that new loop data is stored inside the VMap.
+ void cloneData(const Loop* NewLoop, const Loop* OldLoop,
+ const ValueToValueMapTy& VMap);
+ };
+
class LoopUnswitch : public LoopPass {
LoopInfo *LI; // Loop information
LPPassManager *LPM;
@@ -71,8 +128,9 @@ namespace {
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
- SmallPtrSet<Value *,8> UnswitchedVals;
-
+
+ LUAnalysisCache BranchesInfo;
+
bool OptimizeForSize;
bool redoLoop;
@@ -80,9 +138,9 @@ namespace {
DominatorTree *DT;
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
-
+
// LoopBlocks contains all of the basic blocks of the loop, including the
- // preheader of the loop, the body of the loop, and the exit blocks of the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
// loop, in that order.
std::vector<BasicBlock*> LoopBlocks;
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
@@ -90,8 +148,8 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopUnswitch(bool Os = false) :
- LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ explicit LoopUnswitch(bool Os = false) :
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(NULL), DT(NULL), loopHeader(NULL),
loopPreheader(NULL) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
@@ -117,7 +175,7 @@ namespace {
private:
virtual void releaseMemory() {
- UnswitchedVals.clear();
+ BranchesInfo.forgetLoop(currentLoop);
}
/// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -147,7 +205,7 @@ namespace {
Constant *Val, bool isEqual);
void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest,
+ BasicBlock *TrueDest,
BasicBlock *FalseDest,
Instruction *InsertPt);
@@ -160,6 +218,112 @@ namespace {
};
}
+
+// Analyze loop. Check its size, calculate is it possible to unswitch
+// it. Returns true if we can unswitch this loop.
+bool LUAnalysisCache::countLoop(const Loop* L) {
+
+ std::pair<LoopPropsMapIt, bool> InsertRes =
+ LoopsProperties.insert(std::make_pair(L, LoopProperties()));
+
+ LoopProperties& Props = InsertRes.first->second;
+
+ if (InsertRes.second) {
+ // New loop.
+
+ // Limit the number of instructions to avoid causing significant code
+ // expansion, and the number of basic blocks, to avoid loops with
+ // large numbers of branches which cause loop unswitching to go crazy.
+ // This is a very ad-hoc heuristic.
+
+ // FIXME: This is overly conservative because it does not take into
+ // consideration code simplification opportunities and code that can
+ // be shared by the resultant unswitched loops.
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(),
+ E = L->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I);
+
+ Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
+ Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
+ MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
+ }
+
+ if (!Props.CanBeUnswitchedCount) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << L->getHeader()->getName() << ", cost too high: "
+ << L->getBlocks().size() << "\n");
+
+ return false;
+ }
+
+ // Be careful. This links are good only before new loop addition.
+ CurrentLoopProperties = &Props;
+ CurLoopInstructions = &Props.UnswitchedVals;
+
+ return true;
+}
+
+// Clean all data related to given loop.
+void LUAnalysisCache::forgetLoop(const Loop* L) {
+
+ LoopPropsMapIt LIt = LoopsProperties.find(L);
+
+ if (LIt != LoopsProperties.end()) {
+ LoopProperties& Props = LIt->second;
+ MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
+ LoopsProperties.erase(LIt);
+ }
+
+ CurrentLoopProperties = NULL;
+ CurLoopInstructions = NULL;
+}
+
+// Mark case value as unswitched.
+// Since SI instruction can be partly unswitched, in order to avoid
+// extra unswitching in cloned loops keep track all unswitched values.
+void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) {
+ (*CurLoopInstructions)[SI].insert(V);
+}
+
+// Check was this case value unswitched before or not.
+bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) {
+ return (*CurLoopInstructions)[SI].count(V);
+}
+
+// Clone all loop-unswitch related loop properties.
+// Redistribute unswitching quotas.
+// Note, that new loop data is stored inside the VMap.
+void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
+ const ValueToValueMapTy& VMap) {
+
+ LoopProperties& NewLoopProps = LoopsProperties[NewLoop];
+ LoopProperties& OldLoopProps = *CurrentLoopProperties;
+ UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals;
+
+ // Reallocate "can-be-unswitched quota"
+
+ --OldLoopProps.CanBeUnswitchedCount;
+ unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
+ NewLoopProps.CanBeUnswitchedCount = Quota / 2;
+ OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
+
+ NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
+
+ // Clone unswitched values info:
+ // for new loop switches we clone info about values that was
+ // already unswitched and has redundant successors.
+ for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) {
+ const SwitchInst* OldInst = I->first;
+ Value* NewI = VMap.lookup(OldInst);
+ const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI);
+ assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
+
+ NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
+ }
+}
+
char LoopUnswitch::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
@@ -169,14 +333,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-Pass *llvm::createLoopUnswitchPass(bool Os) {
- return new LoopUnswitch(Os);
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+ return new LoopUnswitch(Os);
}
/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
/// invariant in the loop, or has an invariant piece, return the invariant.
/// Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+
+ // We started analyze new instruction, increment scanned instructions counter.
+ ++TotalInsts;
+
// We can never unswitch on vector conditions.
if (Cond->getType()->isVectorTy())
return 0;
@@ -201,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
return RHS;
}
-
+
return 0;
}
@@ -226,16 +394,36 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
return Changed;
}
-/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// processCurrentLoop - Do actual work and unswitch loop if possible
/// and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
- LLVMContext &Context = currentLoop->getHeader()->getContext();
+
+ initLoopData();
+
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
+ // Loops with indirectbr cannot be cloned.
+ if (!currentLoop->isSafeToClone())
+ return false;
+
+ // Without dedicated exits, splitting the exit edge may fail.
+ if (!currentLoop->hasDedicatedExits())
+ return false;
+
+ LLVMContext &Context = loopHeader->getContext();
+
+ // Probably we reach the quota of branches for this loop. If so
+ // stop unswitching.
+ if (!BranchesInfo.countLoop(currentLoop))
+ return false;
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
- for (Loop::block_iterator I = currentLoop->block_begin(),
+ for (Loop::block_iterator I = currentLoop->block_begin(),
E = currentLoop->block_end(); I != E; ++I) {
TerminatorInst *TI = (*I)->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -244,24 +432,37 @@ bool LoopUnswitch::processCurrentLoop() {
if (BI->isConditional()) {
// See if this, or some part of it, is loop invariant. If so, we can
// unswitch on it if we desire.
- Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
currentLoop, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond,
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumBranches;
return true;
}
- }
+ }
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && SI->getNumCases() > 1) {
+ unsigned NumCases = SI->getNumCases();
+ if (LoopCond && NumCases) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = SI->getCaseValue(1);
+ Constant *UnswitchVal = NULL;
+
// Do not process same value again and again.
- if (!UnswitchedVals.insert(UnswitchVal))
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ Constant* UnswitchValCandidate = i.getCaseValue();
+ if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
+ }
+
+ if (!UnswitchVal)
continue;
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -270,14 +471,14 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
}
-
+
// Scan the instructions to check for unswitchable values.
- for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
+ for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
BBI != E; ++BBI)
if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond,
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumSelects;
return true;
@@ -297,7 +498,8 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
BasicBlock *&ExitBB,
std::set<BasicBlock*> &Visited) {
if (!Visited.insert(BB).second) {
- // Already visited. Without more analysis, this could indicate an infinte loop.
+ // Already visited. Without more analysis, this could indicate an infinite
+ // loop.
return false;
} else if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
@@ -306,7 +508,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
ExitBB = BB;
return true;
}
-
+
// Otherwise, this is an unvisited intra-loop node. Check all successors.
for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
// Check to see if the successor is a trivial loop exit.
@@ -319,12 +521,12 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (I->mayHaveSideEffects())
return false;
-
+
return true;
}
/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the
+/// leads to an exit from the specified loop, and has no side-effects in the
/// process. If so, return the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
@@ -352,49 +554,61 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
BasicBlock *Header = currentLoop->getHeader();
TerminatorInst *HeaderTerm = Header->getTerminator();
LLVMContext &Context = Header->getContext();
-
+
BasicBlock *LoopExitBB = 0;
if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
// If the header block doesn't end with a conditional branch on Cond, we
// can't handle it.
if (!BI->isConditional() || BI->getCondition() != Cond)
return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
BI->getSuccessor(0)))) {
if (Val) *Val = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
BI->getSuccessor(1)))) {
if (Val) *Val = ConstantInt::getFalse(Context);
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
// If this isn't a switch on Cond, we can't handle it.
if (SI->getCondition() != Cond) return false;
-
+
// Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
+ // latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
- // this. Note that we can't trivially unswitch on the default case.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- SI->getSuccessor(i)))) {
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock* LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+ i.getCaseSuccessor()))) {
// Okay, we found a trivial case, remember the value that is trivial.
- if (Val) *Val = SI->getCaseValue(i);
+ ConstantInt* CaseVal = i.getCaseValue();
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (BranchesInfo.isUnswitched(SI, CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ if (Val) *Val = CaseVal;
break;
}
+ }
}
// If we didn't find a single unique LoopExit block, or if the loop exit block
// contains phi nodes, this isn't trivial.
if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
return false; // Can't handle this.
-
+
if (LoopExit) *LoopExit = LoopExitBB;
-
+
// We already know that nothing uses any scalar values defined inside of this
// loop. As such, we just have to check to see if this loop will execute any
// side-effecting instructions (e.g. stores, calls, volatile loads) in the
@@ -411,12 +625,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
- initLoopData();
-
- // If LoopSimplify was unable to form a preheader, don't do any unswitching.
- if (!loopPreheader)
- return false;
-
Function *F = loopHeader->getParent();
Constant *CondVal = 0;
@@ -434,28 +642,6 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
return false;
- // FIXME: This is overly conservative because it does not take into
- // consideration code simplification opportunities and code that can
- // be shared by the resultant unswitched loops.
- CodeMetrics Metrics;
- for (Loop::block_iterator I = currentLoop->block_begin(),
- E = currentLoop->block_end();
- I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
-
- // Limit the number of instructions to avoid causing significant code
- // expansion, and the number of basic blocks, to avoid loops with
- // large numbers of branches which cause loop unswitching to go crazy.
- // This is a very ad-hoc heuristic.
- if (Metrics.NumInsts > Threshold ||
- Metrics.NumBlocks * 5 > Threshold ||
- Metrics.containsIndirectBr || Metrics.isRecursive) {
- DEBUG(dbgs() << "NOT unswitching loop %"
- << currentLoop->getHeader()->getName() << ", cost too high: "
- << currentLoop->getBlocks().size() << "\n");
- return false;
- }
-
UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
return true;
}
@@ -508,17 +694,17 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
/// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no
+/// where the path through the loop that doesn't execute its body has no
/// side-effects), unswitch it. This doesn't involve any code duplication, just
/// moving the conditional branch outside of the loop and updating loop info.
-void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
- Constant *Val,
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+ Constant *Val,
BasicBlock *ExitBlock) {
DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
<< loopHeader->getName() << " [" << L->getBlocks().size()
<< " blocks] in Function " << L->getHeader()->getParent()->getName()
<< " on cond: " << *Val << " == " << *Cond << "\n");
-
+
// First step, split the preheader, so that we know that there is a safe place
// to insert the conditional branch. We will change loopPreheader to have a
// conditional branch on Cond.
@@ -527,24 +713,24 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we should
// short-circuit to.
-
+
// Split this block now, so that the loop maintains its exit block, and so
// that the jump from the preheader can execute the contents of the exit block
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
-
- // Okay, now we have a position to branch from and a position to branch to,
+
+ // Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
- EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
+ EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
loopPreheader->getTerminator());
LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
loopPreheader->getTerminator()->eraseFromParent();
// We need to reprocess this loop, it could be unswitched again.
redoLoop = true;
-
+
// Now that we know that the loop is never entered when this condition is a
// particular value, rewrite the loop with this info. We know that this will
// at least eliminate the old branch.
@@ -554,7 +740,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
/// SplitExitEdges - Split all of the edges from inside the loop to their exit
/// blocks. Update the appropriate Phi nodes as we do so.
-void LoopUnswitch::SplitExitEdges(Loop *L,
+void LoopUnswitch::SplitExitEdges(Loop *L,
const SmallVector<BasicBlock *, 8> &ExitBlocks){
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
@@ -565,8 +751,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
if (!ExitBlock->isLandingPad()) {
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
@@ -575,10 +760,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
}
}
-/// UnswitchNontrivialCondition - We determined that the loop is profitable
-/// to unswitch when LIC equal Val. Split it into loop versions and test the
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val. Split it into loop versions and test the
/// condition outside of either loop. Return the loops created as Out1/Out2.
-void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
Loop *L) {
Function *F = loopHeader->getParent();
DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
@@ -621,6 +806,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -633,6 +819,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// Now we create the new Loop object for the versioned loop.
Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+
+ // Recalculate unswitching quota, inherit simplified switches info for NewBB,
+ // Probably clone more loop-unswitch related loop properties.
+ BranchesInfo.cloneData(NewLoop, L, VMap);
+
Loop *ParentLoop = L->getParentLoop();
if (ParentLoop) {
// Make sure to add the cloned preheader and exit blocks to the parent loop
@@ -645,7 +836,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// The new exit block should be in the same loop as the old one.
if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
-
+
assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
"Exit block should have been split to have one successor!");
BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
@@ -680,7 +871,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
-
+
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
@@ -699,7 +890,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// the condition that we're unswitching on), we don't rewrite the second
// iteration.
WeakVH LICHandle(LIC);
-
+
// Now we rewrite the original code to know that the condition is true and the
// new code to know that the condition is false.
RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
@@ -714,7 +905,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
/// RemoveFromWorklist - Remove all instances of I from the worklist vector
/// specified.
-static void RemoveFromWorklist(Instruction *I,
+static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
Worklist.end(), I);
@@ -727,7 +918,7 @@ static void RemoveFromWorklist(Instruction *I,
/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
-static void ReplaceUsesOfWith(Instruction *I, Value *V,
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
Loop *L, LPPassManager *LPM) {
DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
@@ -760,10 +951,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
if (BasicBlock *Pred = BB->getSinglePredecessor()) {
// If it has one pred, fold phi nodes in BB.
while (isa<PHINode>(BB->begin()))
- ReplaceUsesOfWith(BB->begin(),
- cast<PHINode>(BB->begin())->getIncomingValue(0),
+ ReplaceUsesOfWith(BB->begin(),
+ cast<PHINode>(BB->begin())->getIncomingValue(0),
Worklist, L, LPM);
-
+
// If this is the header of a loop and the only pred is the latch, we now
// have an unreachable loop.
if (Loop *L = LI->getLoopFor(BB))
@@ -774,15 +965,15 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
Pred->getTerminator()->eraseFromParent();
new UnreachableInst(BB->getContext(), Pred);
-
+
// The loop is now broken, remove it from LI.
RemoveLoopFromHierarchy(L);
-
+
// Reprocess the header, which now IS dead.
RemoveBlockIfDead(BB, Worklist, L);
return;
}
-
+
// If pred ends in a uncond branch, add uncond branch to worklist so that
// the two blocks will get merged.
if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
@@ -793,11 +984,11 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
}
DEBUG(dbgs() << "Nuking dead block: " << *BB);
-
+
// Remove the instructions in the basic block from the worklist.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
RemoveFromWorklist(I, Worklist);
-
+
// Anything that uses the instructions in this basic block should have their
// uses replaced with undefs.
// If I is not void type then replaceAllUsesWith undef.
@@ -805,7 +996,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
if (!I->getType()->isVoidTy())
I->replaceAllUsesWith(UndefValue::get(I->getType()));
}
-
+
// If this is the edge to the header block for a loop, remove the loop and
// promote all subloops.
if (Loop *BBLoop = LI->getLoopFor(BB)) {
@@ -821,8 +1012,8 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
// Remove the block from the loop info, which removes it from any loops it
// was in.
LI->removeBlock(BB);
-
-
+
+
// Remove phi node entries in successors for this block.
TerminatorInst *TI = BB->getTerminator();
SmallVector<BasicBlock*, 4> Succs;
@@ -830,13 +1021,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
Succs.push_back(TI->getSuccessor(i));
TI->getSuccessor(i)->removePredecessor(BB);
}
-
+
// Unique the successors, remove anything with multiple uses.
array_pod_sort(Succs.begin(), Succs.end());
Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-
+
// Remove the basic block, including all of the instructions contained in it.
- LPM->deleteSimpleAnalysisValue(BB, L);
+ LPM->deleteSimpleAnalysisValue(BB, L);
BB->eraseFromParent();
// Remove successor blocks here that are not dead, so that we know we only
// have dead blocks in this list. Nondead blocks have a way of becoming dead,
@@ -854,7 +1045,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
--i;
}
}
-
+
for (unsigned i = 0, e = Succs.size(); i != e; ++i)
RemoveBlockIfDead(Succs[i], Worklist, L);
}
@@ -877,14 +1068,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val,
bool IsEqual) {
assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
-
+
// FIXME: Support correlated properties, like:
// for (...)
// if (li1 < li2)
// ...
// if (li1 > li2)
// ...
-
+
// FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
// selects, switches.
std::vector<Instruction*> Worklist;
@@ -899,21 +1090,25 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
if (IsEqual)
Replacement = Val;
else
- Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
+ Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
!cast<ConstantInt>(Val)->getZExtValue());
-
+
for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
UI != E; ++UI) {
Instruction *U = dyn_cast<Instruction>(*UI);
if (!U || !L->contains(U))
continue;
- U->replaceUsesOfWith(LIC, Replacement);
Worklist.push_back(U);
}
+
+ for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+ UI != Worklist.end(); ++UI)
+ (*UI)->replaceUsesOfWith(LIC, Replacement);
+
SimplifyCode(Worklist, L);
return;
}
-
+
// Otherwise, we don't know the precise value of LIC, but we do know that it
// is certainly NOT "Val". As such, simplify any uses in the loop that we
// can. This case occurs when we unswitch switch statements.
@@ -925,23 +1120,27 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Worklist.push_back(U);
- // TODO: We could do other simplifications, for example, turning
+ // TODO: We could do other simplifications, for example, turning
// 'icmp eq LIC, Val' -> false.
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(U);
if (SI == 0 || !isa<ConstantInt>(Val)) continue;
-
- unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
- if (DeadCase == 0) continue; // Default case is live for multiple values.
-
- // Found a dead case value. Don't remove PHI nodes in the
+
+ SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ // Default case is live for multiple values.
+ if (DeadCase == SI->case_default()) continue;
+
+ // Found a dead case value. Don't remove PHI nodes in the
// successor if they become single-entry, those PHI nodes may
// be in the Users list.
BasicBlock *Switch = SI->getParent();
- BasicBlock *SISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *SISucc = DeadCase.getCaseSuccessor();
BasicBlock *Latch = L->getLoopLatch();
+
+ BranchesInfo.setUnswitched(SI, Val);
+
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -957,7 +1156,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// Compute the successors instead of relying on the return value
// of SplitEdge, since it may have split the switch successor
// after PHI nodes.
- BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
BasicBlock *OldSISucc = *succ_begin(NewSISucc);
// Create an "unreachable" destination.
BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
@@ -981,7 +1180,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
if (DT)
DT->addNewBlock(Abort, NewSISucc);
}
-
+
SimplifyCode(Worklist, L);
}
@@ -1002,7 +1201,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
DEBUG(dbgs() << "Remove dead instruction '" << *I);
-
+
// Add uses to the worklist, which may be dead now.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
@@ -1017,7 +1216,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (Value *V = SimplifyInstruction(I, 0, 0, DT))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
@@ -1034,24 +1233,24 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
if (!SinglePred) continue; // Nothing to do.
assert(SinglePred == Pred && "CFG broken");
- DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
+ DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
<< Succ->getName() << "\n");
-
+
// Resolve any single entry PHI nodes in Succ.
while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
-
+
// If Succ has any successors with PHI nodes, update them to have
// entries coming from Pred instead of Succ.
Succ->replaceAllUsesWith(Pred);
-
+
// Move all of the successor contents from Succ to Pred.
Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
-
+
// Remove Succ from the loop tree.
LI->removeBlock(Succ);
LPM->deleteSimpleAnalysisValue(Succ, L);
@@ -1059,7 +1258,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
++NumSimplify;
continue;
}
-
+
if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
// Conditional branch. Turn it into an unconditional branch, then
// remove dead blocks.
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index eeb8931446dc..a87cce3f9d3e 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -147,8 +147,8 @@ struct MemsetRange {
} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
- // If we found more than 8 stores to merge or 64 bytes, use memset.
- if (TheStores.size() >= 8 || End-Start >= 64) return true;
+ // If we found more than 4 stores to merge or 16 bytes, use memset.
+ if (TheStores.size() >= 4 || End-Start >= 16) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
@@ -806,21 +806,25 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
MemDepResult DepInfo = MD->getDependency(M);
- if (!DepInfo.isClobber())
- return false;
-
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
-
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), C)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- return true;
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
}
}
-
+
+ AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
+ M, M->getParent());
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ }
+
return false;
}
@@ -945,7 +949,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
RepeatInstruction = processMemMove(M);
else if (CallSite CS = (Value*)I) {
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
- if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ if (CS.isByValArgument(i))
MadeChange |= processByValArgument(CS, i);
}
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index da74e9c3ec79..40b0b2061689 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -88,13 +88,14 @@ namespace {
}
#endif
- ValueT &operator[](KeyT Arg) {
+ ValueT &operator[](const KeyT &Arg) {
std::pair<typename MapTy::iterator, bool> Pair =
Map.insert(std::make_pair(Arg, size_t(0)));
if (Pair.second) {
- Pair.first->second = Vector.size();
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
Vector.push_back(std::make_pair(Arg, ValueT()));
- return Vector.back().second;
+ return Vector[Num].second;
}
return Vector[Pair.first->second].second;
}
@@ -104,14 +105,15 @@ namespace {
std::pair<typename MapTy::iterator, bool> Pair =
Map.insert(std::make_pair(InsertPair.first, size_t(0)));
if (Pair.second) {
- Pair.first->second = Vector.size();
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
Vector.push_back(InsertPair);
- return std::make_pair(llvm::prior(Vector.end()), true);
+ return std::make_pair(Vector.begin() + Num, true);
}
return std::make_pair(Vector.begin() + Pair.first->second, false);
}
- const_iterator find(KeyT Key) const {
+ const_iterator find(const KeyT &Key) const {
typename MapTy::const_iterator It = Map.find(Key);
if (It == Map.end()) return Vector.end();
return Vector.begin() + It->second;
@@ -121,7 +123,7 @@ namespace {
/// from the vector, it just zeros out the key in the vector. This leaves
/// iterators intact, but clients must be prepared for zeroed-out keys when
/// iterating.
- void blot(KeyT Key) {
+ void blot(const KeyT &Key) {
typename MapTy::iterator It = Map.find(Key);
if (It == Map.end()) return;
Vector[It->second].first = KeyT();
@@ -179,9 +181,13 @@ static bool IsPotentialUse(const Value *Op) {
Arg->hasNestAttr() ||
Arg->hasStructRetAttr())
return false;
- // Only consider values with pointer types, and not function pointers.
+ // Only consider values with pointer types.
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never reference-counted, however clang occasionally
+ // bitcasts reference-counted pointers to function-pointer type
+ // temporarily.
PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty || isa<FunctionType>(Ty->getElementType()))
+ if (!Ty)
return false;
// Conservatively assume anything else is a potential use.
return true;
@@ -371,7 +377,7 @@ static InstructionClass GetBasicInstructionClass(const Value *V) {
}
// Otherwise, be conservative.
- return IC_User;
+ return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
}
/// IsRetain - Test if the the given class is objc_retain or
@@ -597,6 +603,46 @@ static bool ModuleHasARC(const Module &M) {
M.getNamedValue("objc_unretainedPointer");
}
+/// DoesObjCBlockEscape - Test whether the given pointer, which is an
+/// Objective C block pointer, does not "escape". This differs from regular
+/// escape analysis in that a use as an argument to a call is not considered
+/// an escape.
+static bool DoesObjCBlockEscape(const Value *BlockPtr) {
+ // Walk the def-use chains.
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(BlockPtr);
+ do {
+ const Value *V = Worklist.pop_back_val();
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const User *UUser = *UI;
+ // Special - Use by a call (callee or argument) is not considered
+ // to be an escape.
+ if (isa<CallInst>(UUser) || isa<InvokeInst>(UUser))
+ continue;
+ // Use by an instruction which copies the value is an escape if the
+ // result is an escape.
+ if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+ isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+ Worklist.push_back(UUser);
+ continue;
+ }
+ // Use by a load is not an escape.
+ if (isa<LoadInst>(UUser))
+ continue;
+ // Use by a store is not an escape if the use is the address.
+ if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+ if (V != SI->getValueOperand())
+ continue;
+ // Otherwise, conservatively assume an escape.
+ return true;
+ }
+ } while (!Worklist.empty());
+
+ // No escapes found.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// ARC AliasAnalysis.
//===----------------------------------------------------------------------===//
@@ -850,6 +896,139 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
}
//===----------------------------------------------------------------------===//
+// ARC autorelease pool elimination.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+
+namespace {
+ /// ObjCARCAPElim - Autorelease pool elimination.
+ class ObjCARCAPElim : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnModule(Module &M);
+
+ bool MayAutorelease(CallSite CS, unsigned Depth = 0);
+ bool OptimizeBB(BasicBlock *BB);
+
+ public:
+ static char ID;
+ ObjCARCAPElim() : ModulePass(ID) {
+ initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim,
+ "objc-arc-apelim",
+ "ObjC ARC autorelease pool elimination",
+ false, false)
+
+Pass *llvm::createObjCARCAPElimPass() {
+ return new ObjCARCAPElim();
+}
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+/// MayAutorelease - Interprocedurally determine if calls made by the
+/// given call site can possibly produce autoreleases.
+bool ObjCARCAPElim::MayAutorelease(CallSite CS, unsigned Depth) {
+ if (Function *Callee = CS.getCalledFunction()) {
+ if (Callee->isDeclaration() || Callee->mayBeOverridden())
+ return true;
+ for (Function::iterator I = Callee->begin(), E = Callee->end();
+ I != E; ++I) {
+ BasicBlock *BB = I;
+ for (BasicBlock::iterator J = BB->begin(), F = BB->end(); J != F; ++J)
+ if (CallSite JCS = CallSite(J))
+ // This recursion depth limit is arbitrary. It's just great
+ // enough to cover known interesting testcases.
+ if (Depth < 3 &&
+ !JCS.onlyReadsMemory() &&
+ MayAutorelease(JCS, Depth + 1))
+ return true;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+ bool Changed = false;
+
+ Instruction *Push = 0;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPush:
+ Push = Inst;
+ break;
+ case IC_AutoreleasepoolPop:
+ // If this pop matches a push and nothing in between can autorelease,
+ // zap the pair.
+ if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+ Changed = true;
+ Inst->eraseFromParent();
+ Push->eraseFromParent();
+ }
+ Push = 0;
+ break;
+ case IC_CallOrUser:
+ if (MayAutorelease(CallSite(Inst)))
+ Push = 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!ModuleHasARC(M))
+ return false;
+
+ // Find the llvm.global_ctors variable, as the first step in
+ // identifying the global constructors.
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return false;
+
+ assert(GV->hasDefinitiveInitializer() &&
+ "llvm.global_ctors is uncooperative!");
+
+ bool Changed = false;
+
+ // Dig the constructor functions out of GV's initializer.
+ ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
+ OI != OE; ++OI) {
+ Value *Op = *OI;
+ // llvm.global_ctors is an array of pairs where the second members
+ // are constructor functions.
+ Function *F = cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ // Only look at function definitions.
+ if (F->isDeclaration())
+ continue;
+ // Only look at functions with one basic block.
+ if (llvm::next(F->begin()) != F->end())
+ continue;
+ // Ok, a single-block constructor function definition. Try to optimize it.
+ Changed |= OptimizeBB(F->begin());
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
// ARC optimization.
//===----------------------------------------------------------------------===//
@@ -896,8 +1075,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
#include "llvm/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseSet.h"
STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1158,6 +1338,12 @@ namespace {
/// with the "tail" keyword.
bool IsTailCallRelease;
+ /// Partial - True of we've seen an opportunity for partial RR elimination,
+ /// such as pushing calls into a CFG triangle or into one side of a
+ /// CFG diamond.
+ /// TODO: Consider moving this to PtrState.
+ bool Partial;
+
/// ReleaseMetadata - If the Calls are objc_release calls and they all have
/// a clang.imprecise_release tag, this is the metadata tag.
MDNode *ReleaseMetadata;
@@ -1171,7 +1357,8 @@ namespace {
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
RRInfo() :
- KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false),
+ KnownSafe(false), IsRetainBlock(false),
+ IsTailCallRelease(false), Partial(false),
ReleaseMetadata(0) {}
void clear();
@@ -1182,6 +1369,7 @@ void RRInfo::clear() {
KnownSafe = false;
IsRetainBlock = false;
IsTailCallRelease = false;
+ Partial = false;
ReleaseMetadata = 0;
Calls.clear();
ReverseInsertPts.clear();
@@ -1239,16 +1427,6 @@ namespace {
Seq = NewSeq;
}
- void SetSeqToRelease(MDNode *M) {
- if (Seq == S_None || Seq == S_Use) {
- Seq = M ? S_MovableRelease : S_Release;
- RRI.ReleaseMetadata = M;
- } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
- Seq = S_Release;
- RRI.ReleaseMetadata = 0;
- }
- }
-
Sequence GetSeq() const {
return Seq;
}
@@ -1272,8 +1450,16 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
Seq = S_None;
+ // If we're not in a sequence (anymore), drop all associated state.
if (Seq == S_None) {
RRI.clear();
+ } else if (RRI.Partial || Other.RRI.Partial) {
+ // If we're doing a merge on a path that's previously seen a partial
+ // merge, conservatively drop the sequence, to avoid doing partial
+ // RR elimination. If the branch predicates for the two merge differ,
+ // mixing them is unsafe.
+ Seq = S_None;
+ RRI.clear();
} else {
// Conservatively merge the ReleaseMetadata information.
if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
@@ -1282,8 +1468,15 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
- RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
- Other.RRI.ReverseInsertPts.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ RRI.Partial = RRI.ReverseInsertPts.size() !=
+ Other.RRI.ReverseInsertPts.size();
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ I = Other.RRI.ReverseInsertPts.begin(),
+ E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+ RRI.Partial |= RRI.ReverseInsertPts.insert(*I);
}
}
@@ -1460,6 +1653,14 @@ namespace {
/// metadata.
unsigned ImpreciseReleaseMDKind;
+ /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
+ /// metadata.
+ unsigned CopyOnEscapeMDKind;
+
+ /// NoObjCARCExceptionsMDKind - The Metadata Kind for
+ /// clang.arc.no_objc_arc_exceptions metadata.
+ unsigned NoObjCARCExceptionsMDKind;
+
Constant *getRetainRVCallee(Module *M);
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
@@ -1467,6 +1668,8 @@ namespace {
Constant *getRetainBlockCallee(Module *M);
Constant *getAutoreleaseCallee(Module *M);
+ bool IsRetainBlockOptimizable(const Instruction *Inst);
+
void OptimizeRetainCall(Function &F, Instruction *Retain);
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
@@ -1475,9 +1678,16 @@ namespace {
void CheckForCFGHazards(const BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
BBState &MyStates) const;
+ bool VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates);
bool VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains);
+ bool VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates);
bool VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases);
@@ -1534,6 +1744,22 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
+bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
+ // Without the magic metadata tag, we have to assume this might be an
+ // objc_retainBlock call inserted to convert a block pointer to an id,
+ // in which case it really is needed.
+ if (!Inst->getMetadata(CopyOnEscapeMDKind))
+ return false;
+
+ // If the pointer "escapes" (not including being used in a call),
+ // the copy may be needed.
+ if (DoesObjCBlockEscape(Inst))
+ return false;
+
+ // Otherwise, it's not needed.
+ return true;
+}
+
Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
if (!RetainRVCallee) {
LLVMContext &C = M->getContext();
@@ -1737,6 +1963,7 @@ namespace {
/// use here.
enum DependenceKind {
NeedsPositiveRetainCount,
+ AutoreleasePoolBoundary,
CanChangeRetainCount,
RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
@@ -1766,6 +1993,19 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
}
}
+ case AutoreleasePoolBoundary: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ // These mark the end and begin of an autorelease pool scope.
+ return true;
+ default:
+ // Nothing else does this.
+ return false;
+ }
+ }
+
case CanChangeRetainCount: {
InstructionClass Class = GetInstructionClass(Inst);
switch (Class) {
@@ -1783,6 +2023,7 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
case RetainAutoreleaseDep:
switch (GetBasicInstructionClass(Inst)) {
case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
// Don't merge an objc_autorelease with an objc_retain inside a different
// autoreleasepool scope.
return true;
@@ -1794,7 +2035,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
// Nothing else matters for objc_retainAutorelease formation.
return false;
}
- break;
case RetainAutoreleaseRVDep: {
InstructionClass Class = GetBasicInstructionClass(Inst);
@@ -1808,7 +2048,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
// retainAutoreleaseReturnValue formation.
return CanInterruptRV(Class);
}
- break;
}
case RetainRVDep:
@@ -1816,7 +2055,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
}
llvm_unreachable("Invalid dependence flavor");
- return true;
}
/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
@@ -1920,17 +2158,26 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
/// return true.
bool
ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
- // Check for the argument being from an immediately preceding call.
+ // Check for the argument being from an immediately preceding call or invoke.
Value *Arg = GetObjCArg(RetainRV);
CallSite CS(Arg);
- if (Instruction *Call = CS.getInstruction())
+ if (Instruction *Call = CS.getInstruction()) {
if (Call->getParent() == RetainRV->getParent()) {
BasicBlock::iterator I = Call;
++I;
while (isNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock *RetainRVParent = RetainRV->getParent();
+ if (II->getNormalDest() == RetainRVParent) {
+ BasicBlock::iterator I = RetainRVParent->begin();
+ while (isNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ }
}
+ }
// Check for being preceded by an objc_autoreleaseReturnValue on the same
// pointer. In this case, we can delete the pair.
@@ -2144,9 +2391,34 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Check that there is nothing that cares about the reference
// count between the call and the phi.
- FindDependencies(NeedsPositiveRetainCount, Arg,
- Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainBlock:
+ // These can always be moved up.
+ break;
+ case IC_Release:
+ // These can't be moved across things that care about the retain count.
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_Autorelease:
+ // These can't be moved across autorelease pool scope boundaries.
+ FindDependencies(AutoreleasePoolBoundary, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_RetainRV:
+ case IC_AutoreleaseRV:
+ // Don't move these; the RV optimization depends on the autoreleaseRV
+ // being tail called, and the retainRV being immediately after a call
+ // (which might still happen if we get lucky with codegen layout, but
+ // it's not worth taking the chance).
+ continue;
+ default:
+ llvm_unreachable("Invalid dependence flavor");
+ }
+
if (DependingInstructions.size() == 1 &&
*DependingInstructions.begin() == PN) {
Changed = true;
@@ -2186,7 +2458,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
BBState &MyStates) const {
// If any top-down local-use or possible-dec has a succ which is earlier in
// the sequence, forget it.
- for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+ for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
E = MyStates.top_down_ptr_end(); I != E; ++I)
switch (I->second.GetSeq()) {
default: break;
@@ -2195,14 +2467,32 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
- PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
- switch (SuccS.GetSeq()) {
+ PtrState &S = I->second;
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ // If the terminator is an invoke marked with the
+ // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+ // ignored, for ARC purposes.
+ if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+ --SE;
+
+ for (; SI != SE; ++SI) {
+ Sequence SuccSSeq = S_None;
+ bool SuccSRRIKnownSafe = false;
+ // If VisitBottomUp has visited this successor, take what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI);
+ if (BBI != BBStates.end()) {
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ }
+ switch (SuccSSeq) {
case S_None:
case S_CanRelease: {
- if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
S.ClearSequenceProgress();
+ break;
+ }
continue;
}
case S_Use:
@@ -2211,7 +2501,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
case S_Stop:
case S_Release:
case S_MovableRelease:
- if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
AllSuccsHaveSame = false;
break;
case S_Retain:
@@ -2223,19 +2513,38 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
case S_CanRelease: {
const Value *Arg = I->first;
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
- PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
- switch (SuccS.GetSeq()) {
+ PtrState &S = I->second;
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ // If the terminator is an invoke marked with the
+ // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+ // ignored, for ARC purposes.
+ if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+ --SE;
+
+ for (; SI != SE; ++SI) {
+ Sequence SuccSSeq = S_None;
+ bool SuccSRRIKnownSafe = false;
+ // If VisitBottomUp has visited this successor, take what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI);
+ if (BBI != BBStates.end()) {
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ }
+ switch (SuccSSeq) {
case S_None: {
- if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
S.ClearSequenceProgress();
+ break;
+ }
continue;
}
case S_CanRelease:
@@ -2245,7 +2554,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
case S_Release:
case S_MovableRelease:
case S_Use:
- if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
AllSuccsHaveSame = false;
break;
case S_Retain:
@@ -2257,8 +2566,167 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
+ }
+ }
+}
+
+bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+ NestingDetected = true;
+
+ S.RRI.clear();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
+ S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.Calls.insert(Inst);
+
+ S.IncrementRefCount();
+ S.IncrementNestCount();
+ break;
+ }
+ case IC_RetainBlock:
+ // An objc_retainBlock call with just a use may need to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (!IsRetainBlockOptimizable(Inst))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ S.DecrementRefCount();
+ S.SetAtLeastOneRefCount();
+ S.DecrementNestCount();
+
+ switch (S.GetSeq()) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_CanRelease:
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ Retains[Inst] = S.RRI;
+ }
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
}
+ return NestingDetected;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearBottomUpPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ S.DecrementRefCount();
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
}
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ S.SetSeq(S_Use);
+ } else if (Seq == S_Release &&
+ (Class == IC_User || Class == IC_CallOrUser)) {
+ // Non-movable releases depend on any possible objc pointer use.
+ S.SetSeq(S_Stop);
+ assert(S.RRI.ReverseInsertPts.empty());
+ // As above; handle invoke specially.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ return NestingDetected;
}
bool
@@ -2274,7 +2742,13 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
succ_const_iterator SI(TI), SE(TI, false);
if (SI == SE)
MyStates.SetAsExit();
- else
+ else {
+ // If the terminator is an invoke marked with the
+ // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+ // ignored, for ARC purposes.
+ if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+ --SE;
+
do {
const BasicBlock *Succ = *SI++;
if (Succ == BB)
@@ -2295,145 +2769,169 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
}
break;
} while (SI != SE);
+ }
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
Instruction *Inst = llvm::prior(I);
- InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
- switch (Class) {
- case IC_Release: {
- Arg = GetObjCArg(Inst);
+ // Invoke instructions are visited as part of their successors (below).
+ if (isa<InvokeInst>(Inst))
+ continue;
+
+ NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+ }
+
+ // If there's a predecessor with an invoke, visit the invoke as
+ // if it were part of this block, since we can't insert code after
+ // an invoke in its own block, and we don't want to split critical
+ // edges.
+ for (pred_iterator PI(BB), PE(BB, false); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PredTI = cast<TerminatorInst>(&Pred->back());
+ if (isa<InvokeInst>(PredTI))
+ NestingDetected |= VisitInstructionBottomUp(PredTI, BB, Retains, MyStates);
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_RetainBlock:
+ // An objc_retainBlock call with just a use may need to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (!IsRetainBlockOptimizable(Inst))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
- // If we see two releases in a row on the same pointer. If so, make
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
// a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second release, which may allow us to
- // eliminate the first release too.
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
// Theoretically we could implement removal of nested retain+release
// pairs by making PtrState hold a stack of states, but this is
// simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+ if (S.GetSeq() == S_Retain)
NestingDetected = true;
- S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+ S.SetSeq(S_Retain);
S.RRI.clear();
- S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ // Don't check S.IsKnownIncremented() here because it's not
+ // sufficient.
+ S.RRI.KnownSafe = S.IsKnownNested();
S.RRI.Calls.insert(Inst);
+ }
- S.IncrementRefCount();
- S.IncrementNestCount();
+ S.SetAtLeastOneRefCount();
+ S.IncrementRefCount();
+ S.IncrementNestCount();
+ return NestingDetected;
+ }
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ S.DecrementRefCount();
+ S.DecrementNestCount();
+
+ switch (S.GetSeq()) {
+ case S_Retain:
+ case S_CanRelease:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_Use:
+ S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ Releases[Inst] = S.RRI;
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
}
- case IC_RetainBlock:
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearTopDownPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
S.DecrementRefCount();
- S.SetAtLeastOneRefCount();
- S.DecrementNestCount();
-
- // An objc_retainBlock call with just a use still needs to be kept,
- // because it may be copying a block from the stack to the heap.
- if (Class == IC_RetainBlock && S.GetSeq() == S_Use)
+ switch (Seq) {
+ case S_Retain:
S.SetSeq(S_CanRelease);
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
- switch (S.GetSeq()) {
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ continue;
case S_Use:
- S.RRI.ReverseInsertPts.clear();
- // FALL THROUGH
case S_CanRelease:
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- Retains[Inst] = S.RRI;
- }
- S.ClearSequenceProgress();
- break;
case S_None:
break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- continue;
- }
- case IC_AutoreleasepoolPop:
- // Conservatively, clear MyStates for all known pointers.
- MyStates.clearBottomUpPointers();
- continue;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- continue;
- default:
- break;
- }
-
- // Consider any other possible effects of this instruction on each
- // pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
- ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
- const Value *Ptr = MI->first;
- if (Ptr == Arg)
- continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.DecrementRefCount();
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
-
- // Check for possible direct uses.
- switch (Seq) {
+ case S_Stop:
case S_Release:
case S_MovableRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
- S.SetSeq(S_Use);
- } else if (Seq == S_Release &&
- (Class == IC_User || Class == IC_CallOrUser)) {
- // Non-movable releases depend on any possible objc pointer use.
- S.SetSeq(S_Stop);
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
- }
- break;
- case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class))
- S.SetSeq(S_Use);
- break;
- case S_CanRelease:
- case S_Use:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
+ llvm_unreachable("top-down pointer in release state!");
}
}
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_CanRelease:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_Retain:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
}
return NestingDetected;
@@ -2453,22 +2951,31 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
MyStates.SetAsEntry();
else
do {
- const BasicBlock *Pred = *PI++;
+ unsigned OperandNo = PI.getOperandNo();
+ const Use &Us = PI.getUse();
+ ++PI;
+
+ // Skip invoke unwind edges on invoke instructions marked with
+ // clang.arc.no_objc_arc_exceptions.
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(Us.getUser()))
+ if (OperandNo == II->getNumArgOperands() + 2 &&
+ II->getMetadata(NoObjCARCExceptionsMDKind))
+ continue;
+
+ const BasicBlock *Pred = cast<TerminatorInst>(Us.getUser())->getParent();
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- assert(I != BBStates.end());
// If we haven't seen this node yet, then we've found a CFG cycle.
// Be optimistic here; it's CheckForCFGHazards' job detect trouble.
- if (!I->second.isVisitedTopDown())
+ if (I == BBStates.end() || !I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- assert(I != BBStates.end());
- if (I->second.isVisitedTopDown())
+ if (I != BBStates.end() && I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2478,147 +2985,89 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
+ NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+ }
- switch (Class) {
- case IC_RetainBlock:
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
+ CheckForCFGHazards(BB, BBStates, MyStates);
+ return NestingDetected;
+}
- PtrState &S = MyStates.getPtrTopDownState(Arg);
+static void
+ComputePostOrders(Function &F,
+ SmallVectorImpl<BasicBlock *> &PostOrder,
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
+ /// Backedges - Backedges detected in the DFS. These edges will be
+ /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
+ /// traversed in the desired order.
+ DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
+
+ /// Visited - The visited set, for doing DFS walks.
+ SmallPtrSet<BasicBlock *, 16> Visited;
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- // If we see two retains in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second retain, which may allow us to
- // eliminate the first retain too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Retain)
- NestingDetected = true;
-
- S.SetSeq(S_Retain);
- S.RRI.clear();
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- // Don't check S.IsKnownIncremented() here because it's not
- // sufficient.
- S.RRI.KnownSafe = S.IsKnownNested();
- S.RRI.Calls.insert(Inst);
+ // Do DFS, computing the PostOrder.
+ SmallPtrSet<BasicBlock *, 16> OnStack;
+ SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+ BasicBlock *EntryBB = &F.getEntryBlock();
+ SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+ Visited.insert(EntryBB);
+ OnStack.insert(EntryBB);
+ do {
+ dfs_next_succ:
+ TerminatorInst *TI = cast<TerminatorInst>(&SuccStack.back().first->back());
+ succ_iterator End = succ_iterator(TI, true);
+ while (SuccStack.back().second != End) {
+ BasicBlock *BB = *SuccStack.back().second++;
+ if (Visited.insert(BB)) {
+ SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ OnStack.insert(BB);
+ goto dfs_next_succ;
}
-
- S.SetAtLeastOneRefCount();
- S.IncrementRefCount();
- S.IncrementNestCount();
- continue;
+ if (OnStack.count(BB))
+ Backedges.insert(std::make_pair(SuccStack.back().first, BB));
}
- case IC_Release: {
- Arg = GetObjCArg(Inst);
+ OnStack.erase(SuccStack.back().first);
+ PostOrder.push_back(SuccStack.pop_back_val().first);
+ } while (!SuccStack.empty());
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.DecrementRefCount();
- S.DecrementNestCount();
-
- switch (S.GetSeq()) {
- case S_Retain:
- case S_CanRelease:
- S.RRI.ReverseInsertPts.clear();
- // FALL THROUGH
- case S_Use:
- S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
- Releases[Inst] = S.RRI;
- S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- break;
- }
- case IC_AutoreleasepoolPop:
- // Conservatively, clear MyStates for all known pointers.
- MyStates.clearTopDownPointers();
- continue;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- continue;
- default:
- break;
- }
+ Visited.clear();
- // Consider any other possible effects of this instruction on each
- // pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
- ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
- const Value *Ptr = MI->first;
- if (Ptr == Arg)
- continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.DecrementRefCount();
- switch (Seq) {
- case S_Retain:
- S.SetSeq(S_CanRelease);
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
+ // Compute the exits, which are the starting points for reverse-CFG DFS.
+ // This includes blocks where all the successors are backedges that
+ // we're skipping.
+ SmallVector<BasicBlock *, 4> Exits;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+ TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI)
+ if (!Backedges.count(std::make_pair(BB, *SI)))
+ goto HasNonBackedgeSucc;
+ Exits.push_back(BB);
+ HasNonBackedgeSucc:;
+ }
- // One call can't cause a transition from S_Retain to S_CanRelease
- // and S_CanRelease to S_Use. If we've made the first transition,
- // we're done.
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
+ I != E; ++I) {
+ BasicBlock *ExitBB = *I;
+ PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+ Visited.insert(ExitBB);
+ while (!PredStack.empty()) {
+ reverse_dfs_next_succ:
+ pred_iterator End = pred_end(PredStack.back().first);
+ while (PredStack.back().second != End) {
+ BasicBlock *BB = *PredStack.back().second++;
+ // Skip backedges detected in the forward-CFG DFS.
+ if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
continue;
- case S_Use:
- case S_CanRelease:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
-
- // Check for possible direct uses.
- switch (Seq) {
- case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class))
- S.SetSeq(S_Use);
- break;
- case S_Retain:
- // An objc_retainBlock call may be responsible for copying the block
- // data from the stack to the heap. Model this by moving it straight
- // from S_Retain to S_Use.
- if (S.RRI.IsRetainBlock &&
- CanUse(Inst, Ptr, PA, Class)) {
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
- S.SetSeq(S_Use);
+ if (Visited.insert(BB)) {
+ PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+ goto reverse_dfs_next_succ;
}
- break;
- case S_Use:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
}
+ ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
}
}
-
- CheckForCFGHazards(BB, BBStates, MyStates);
- return NestingDetected;
}
// Visit - Visit the function both top-down and bottom-up.
@@ -2627,43 +3076,29 @@ ObjCARCOpt::Visit(Function &F,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases) {
- // Use reverse-postorder on the reverse CFG for bottom-up, because we
- // magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release. We can't use
- // ReversePostOrderTraversal here because we want to walk up from each
- // function exit point.
- SmallPtrSet<BasicBlock *, 16> Visited;
- SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
- SmallVector<BasicBlock *, 16> Order;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *BB = I;
- if (BB->getTerminator()->getNumSuccessors() == 0)
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
- }
- while (!Stack.empty()) {
- pred_iterator End = pred_end(Stack.back().first);
- while (Stack.back().second != End) {
- BasicBlock *BB = *Stack.back().second++;
- if (Visited.insert(BB))
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
- }
- Order.push_back(Stack.pop_back_val().first);
- }
+
+ // Use reverse-postorder traversals, because we magically know that loops
+ // will be well behaved, i.e. they won't repeatedly call retain on a single
+ // pointer without doing a release. We can't use the ReversePostOrderTraversal
+ // class here because we want the reverse-CFG postorder to consider each
+ // function exit point, and we want to ignore selected cycle edges.
+ SmallVector<BasicBlock *, 16> PostOrder;
+ SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+
+ // Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
- Order.rbegin(), E = Order.rend(); I != E; ++I) {
- BasicBlock *BB = *I;
- BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
- }
+ ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+ I != E; ++I)
+ BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
- // Use regular reverse-postorder for top-down.
+ // Use reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- typedef ReversePostOrderTraversal<Function *> RPOTType;
- RPOTType RPOT(&F);
- for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- BasicBlock *BB = *I;
- TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
- }
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ PostOrder.rbegin(), E = PostOrder.rend();
+ I != E; ++I)
+ TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -2691,40 +3126,26 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
getRetainBlockCallee(M) : getRetainCallee(M),
MyArg, "", InsertPt);
Call->setDoesNotThrow();
- if (!RetainsToMove.IsRetainBlock)
+ if (RetainsToMove.IsRetainBlock)
+ Call->setMetadata(CopyOnEscapeMDKind,
+ MDNode::get(M->getContext(), ArrayRef<Value *>()));
+ else
Call->setTailCall();
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = RetainsToMove.ReverseInsertPts.begin(),
PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
- Instruction *LastUse = *PI;
- Instruction *InsertPts[] = { 0, 0, 0 };
- if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
- // We can't insert code immediately after an invoke instruction, so
- // insert code at the beginning of both successor blocks instead.
- // The invoke's return value isn't available in the unwind block,
- // but our releases will never depend on it, because they must be
- // paired with retains from before the invoke.
- InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
- InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
- } else {
- // Insert code immediately after the last use.
- InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
- }
-
- for (Instruction **I = InsertPts; *I; ++I) {
- Instruction *InsertPt = *I;
- Value *MyArg = ArgTy == ParamTy ? Arg :
- new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
- "", InsertPt);
- // Attach a clang.imprecise_release metadata tag, if appropriate.
- if (MDNode *M = ReleasesToMove.ReleaseMetadata)
- Call->setMetadata(ImpreciseReleaseMDKind, M);
- Call->setDoesNotThrow();
- if (ReleasesToMove.IsTailCallRelease)
- Call->setTailCall();
- }
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
+ // Attach a clang.imprecise_release metadata tag, if appropriate.
+ if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+ Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setDoesNotThrow();
+ if (ReleasesToMove.IsTailCallRelease)
+ Call->setTailCall();
}
// Delete the original retain and release calls.
@@ -2765,17 +3186,11 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Instruction *Retain = cast<Instruction>(V);
Value *Arg = GetObjCArg(Retain);
- // If the object being released is in static storage, we know it's
+ // If the object being released is in static or stack storage, we know it's
// not being managed by ObjC reference counting, so we can delete pairs
// regardless of what possible decrements or uses lie between them.
- bool KnownSafe = isa<Constant>(Arg);
+ bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
- // Same for stack storage, unless this is an objc_retainBlock call,
- // which is responsible for copying the block data from the stack to
- // the heap.
- if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg))
- KnownSafe = true;
-
// A constant pointer can't be pointing to an object on the heap. It may
// be reference-counted, but it won't be deleted.
if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
@@ -3091,7 +3506,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
UE = Alloca->use_end(); UI != UE; ) {
CallInst *UserInst = cast<CallInst>(*UI++);
if (!UserInst->use_empty())
- UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
UserInst->eraseFromParent();
}
Alloca->eraseFromParent();
@@ -3243,6 +3658,10 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// Identify the imprecise release metadata kind.
ImpreciseReleaseMDKind =
M.getContext().getMDKindID("clang.imprecise_release");
+ CopyOnEscapeMDKind =
+ M.getContext().getMDKindID("clang.arc.copy_on_escape");
+ NoObjCARCExceptionsMDKind =
+ M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
@@ -3344,6 +3763,11 @@ namespace {
/// RetainRV calls to make the optimization work on targets which need it.
const MDString *RetainRVMarker;
+ /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
+ /// at the end of walking the function we have found no alloca
+ /// instructions, these calls can be marked "tail".
+ DenseSet<CallInst *> StoreStrongCalls;
+
Constant *getStoreStrongCallee(Module *M);
Constant *getRetainAutoreleaseCallee(Module *M);
Constant *getRetainAutoreleaseRVCallee(Module *M);
@@ -3547,6 +3971,11 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
+ // We can't set the tail flag yet, because we haven't yet determined
+ // whether there are any escaping allocas. Remember this call, so that
+ // we can set the tail flag once we know it's safe.
+ StoreStrongCalls.insert(StoreStrong);
+
if (&*Iter == Store) ++Iter;
Store->eraseFromParent();
Release->eraseFromParent();
@@ -3593,6 +4022,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
PA.setAA(&getAnalysis<AliasAnalysis>());
+ // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+ // keyword. Be conservative if the function has variadic arguments.
+ // It seems that functions which "return twice" are also unsafe for the
+ // "tail" argument, because they are setjmp, which could need to
+ // return to an earlier stack state.
+ bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice();
+
// For ObjC library calls which return their argument, replace uses of the
// argument with uses of the call return value, if it dominates the use. This
// reduces register pressure.
@@ -3649,6 +4085,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
case IC_Release:
ContractRelease(Inst, I);
continue;
+ case IC_User:
+ // Be conservative if the function has any alloca instructions.
+ // Technically we only care about escaping alloca instructions,
+ // but this is sufficient to handle some interesting cases.
+ if (isa<AllocaInst>(Inst))
+ TailOkForStoreStrongs = false;
+ continue;
default:
continue;
}
@@ -3666,36 +4109,37 @@ bool ObjCARCContract::runOnFunction(Function &F) {
Use &U = UI.getUse();
unsigned OperandNo = UI.getOperandNo();
++UI; // Increment UI now, because we may unlink its element.
- if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
- if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
- Changed = true;
- Instruction *Replacement = Inst;
- Type *UseTy = U.get()->getType();
- if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
- // For PHI nodes, insert the bitcast in the predecessor block.
- unsigned ValNo =
- PHINode::getIncomingValueNumForOperand(OperandNo);
- BasicBlock *BB =
- PHI->getIncomingBlock(ValNo);
- if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "",
- &BB->back());
- for (unsigned i = 0, e = PHI->getNumIncomingValues();
- i != e; ++i)
- if (PHI->getIncomingBlock(i) == BB) {
- // Keep the UI iterator valid.
- if (&PHI->getOperandUse(
- PHINode::getOperandNumForIncomingValue(i)) ==
- &UI.getUse())
- ++UI;
- PHI->setIncomingValue(i, Replacement);
- }
- } else {
- if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
- U.set(Replacement);
- }
+ if (DT->isReachableFromEntry(U) &&
+ DT->dominates(Inst, U)) {
+ Changed = true;
+ Instruction *Replacement = Inst;
+ Type *UseTy = U.get()->getType();
+ if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
+ // For PHI nodes, insert the bitcast in the predecessor block.
+ unsigned ValNo =
+ PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB =
+ PHI->getIncomingBlock(ValNo);
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ &BB->back());
+ for (unsigned i = 0, e = PHI->getNumIncomingValues();
+ i != e; ++i)
+ if (PHI->getIncomingBlock(i) == BB) {
+ // Keep the UI iterator valid.
+ if (&PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) ==
+ &UI.getUse())
+ ++UI;
+ PHI->setIncomingValue(i, Replacement);
+ }
+ } else {
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ cast<Instruction>(U.getUser()));
+ U.set(Replacement);
}
+ }
}
// If Arg is a no-op casted pointer, strip one level of casts and
@@ -3713,5 +4157,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
}
}
+ // If this function has no escaping allocas or suspicious vararg usage,
+ // objc_storeStrong calls can be marked with the "tail" keyword.
+ if (TailOkForStoreStrongs)
+ for (DenseSet<CallInst *>::iterator I = StoreStrongCalls.begin(),
+ E = StoreStrongCalls.end(); I != E; ++I)
+ (*I)->setTailCall();
+ StoreStrongCalls.clear();
+
return Changed;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 8f98a5b6503e..cb408a137eab 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -74,7 +74,7 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
namespace {
class Reassociate : public FunctionPass {
DenseMap<BasicBlock*, unsigned> RankMap;
- DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+ DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
SmallVector<WeakVH, 8> RedoInsts;
SmallVector<WeakVH, 8> DeadInsts;
bool MadeChange;
@@ -210,7 +210,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static Instruction *LowerNegateToMultiply(Instruction *Neg,
- DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -492,7 +492,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
static Instruction *BreakUpSubtract(Instruction *Sub,
- DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
@@ -517,8 +517,8 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
-static Instruction *ConvertShiftToMul(Instruction *Shl,
- DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+static Instruction *ConvertShiftToMul(Instruction *Shl,
+ DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 196a847fc0ea..16b64a500b34 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -25,9 +25,9 @@
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -39,9 +39,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include <algorithm>
-#include <map>
using namespace llvm;
STATISTIC(NumInstRemoved, "Number of instructions removed");
@@ -59,7 +57,7 @@ class LatticeVal {
enum LatticeValueTy {
/// undefined - This LLVM Value has no known value yet.
undefined,
-
+
/// constant - This LLVM Value has a specific constant value.
constant,
@@ -68,7 +66,7 @@ class LatticeVal {
/// with another (different) constant, it goes to overdefined, instead of
/// asserting.
forcedconstant,
-
+
/// overdefined - This instruction is not known to be constant, and we know
/// it has a value.
overdefined
@@ -77,30 +75,30 @@ class LatticeVal {
/// Val: This stores the current lattice value along with the Constant* for
/// the constant if this is a 'constant' or 'forcedconstant' value.
PointerIntPair<Constant *, 2, LatticeValueTy> Val;
-
+
LatticeValueTy getLatticeValue() const {
return Val.getInt();
}
-
+
public:
LatticeVal() : Val(0, undefined) {}
-
+
bool isUndefined() const { return getLatticeValue() == undefined; }
bool isConstant() const {
return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
}
bool isOverdefined() const { return getLatticeValue() == overdefined; }
-
+
Constant *getConstant() const {
assert(isConstant() && "Cannot get the constant of a non-constant!");
return Val.getPointer();
}
-
+
/// markOverdefined - Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
return false;
-
+
Val.setInt(overdefined);
return true;
}
@@ -111,17 +109,17 @@ public:
assert(getConstant() == V && "Marking constant with different value");
return false;
}
-
+
if (isUndefined()) {
Val.setInt(constant);
assert(V && "Marking constant with NULL");
Val.setPointer(V);
} else {
- assert(getLatticeValue() == forcedconstant &&
+ assert(getLatticeValue() == forcedconstant &&
"Cannot move from overdefined to constant!");
// Stay at forcedconstant if the constant is the same.
if (V == getConstant()) return false;
-
+
// Otherwise, we go to overdefined. Assumptions made based on the
// forced value are possibly wrong. Assuming this is another constant
// could expose a contradiction.
@@ -137,7 +135,7 @@ public:
return dyn_cast<ConstantInt>(getConstant());
return 0;
}
-
+
void markForcedConstant(Constant *V) {
assert(isUndefined() && "Can't force a defined value!");
Val.setInt(forcedconstant);
@@ -156,6 +154,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -163,7 +162,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
/// StructType, for example for formal arguments, calls, insertelement, etc.
///
DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
-
+
/// GlobalValue - If we are tracking any values for the contents of a global
/// variable, we keep a mapping from the constant accessor to the element of
/// the global, to the currently known value. If the value becomes
@@ -178,7 +177,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
/// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
/// that return multiple values.
DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
-
+
/// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
/// represented here for efficient lookup.
SmallPtrSet<Function*, 16> MRVFunctionsTracked;
@@ -187,7 +186,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
/// arguments we make optimistic assumptions about and try to prove as
/// constants.
SmallPtrSet<Function*, 16> TrackingIncomingArguments;
-
+
/// The reason for two worklists is that overdefined is the lowest state
/// on the lattice, and moving things to overdefined as fast as possible
/// makes SCCP converge much faster.
@@ -201,16 +200,13 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
SmallVector<BasicBlock*, 64> BBWorkList; // The BasicBlock work list
- /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
- /// overdefined, despite the fact that the PHI node is overdefined.
- std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
-
/// KnownFeasibleEdges - Entries in this set are edges which have already had
/// PHI nodes retriggered.
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const TargetData *td) : TD(td) {}
+ SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+ : TD(td), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -253,7 +249,7 @@ public:
void AddArgumentTrackedFunction(Function *F) {
TrackingIncomingArguments.insert(F);
}
-
+
/// Solve - Solve for constants and executable blocks.
///
void Solve();
@@ -274,9 +270,9 @@ public:
assert(I != ValueState.end() && "V is not in valuemap!");
return I->second;
}
-
+
/*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
- DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
+ DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
StructValueState.find(std::make_pair(V, i));
assert(I != StructValueState.end() && "V is not in valuemap!");
return I->second;
@@ -308,7 +304,7 @@ public:
else
markOverdefined(V);
}
-
+
private:
// markConstant - Make a value be marked as "constant". If the value
// is not already a constant, add it to the instruction work list so that
@@ -322,7 +318,7 @@ private:
else
InstWorkList.push_back(V);
}
-
+
void markConstant(Value *V, Constant *C) {
assert(!V->getType()->isStructTy() && "Should use other method");
markConstant(ValueState[V], V, C);
@@ -338,14 +334,14 @@ private:
else
InstWorkList.push_back(V);
}
-
-
+
+
// markOverdefined - Make a value be marked as "overdefined". If the
// value is not already overdefined, add it to the overdefined instruction
// work list so that the users of the instruction are updated later.
void markOverdefined(LatticeVal &IV, Value *V) {
if (!IV.markOverdefined()) return;
-
+
DEBUG(dbgs() << "markOverdefined: ";
if (Function *F = dyn_cast<Function>(V))
dbgs() << "Function '" << F->getName() << "'\n";
@@ -365,7 +361,7 @@ private:
else if (IV.getConstant() != MergeWithV.getConstant())
markOverdefined(IV, V);
}
-
+
void mergeInValue(Value *V, LatticeVal MergeWithV) {
assert(!V->getType()->isStructTy() && "Should use other method");
mergeInValue(ValueState[V], V, MergeWithV);
@@ -390,7 +386,7 @@ private:
if (!isa<UndefValue>(V))
LV.markConstant(C); // Constants are constant
}
-
+
// All others are underdefined by default.
return LV;
}
@@ -412,21 +408,20 @@ private:
return LV; // Common case, already in the map.
if (Constant *C = dyn_cast<Constant>(V)) {
- if (isa<UndefValue>(C))
- ; // Undef values remain undefined.
- else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
- LV.markConstant(CS->getOperand(i)); // Constants are constant.
- else if (isa<ConstantAggregateZero>(C)) {
- Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
- LV.markConstant(Constant::getNullValue(FieldTy));
- } else
+ Constant *Elt = C->getAggregateElement(i);
+
+ if (Elt == 0)
LV.markOverdefined(); // Unknown sort of constant.
+ else if (isa<UndefValue>(Elt))
+ ; // Undef values remain undefined.
+ else
+ LV.markConstant(Elt); // Constants are constant.
}
-
+
// All others are underdefined by default.
return LV;
}
-
+
/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
/// work list if it is not already executable.
@@ -466,33 +461,6 @@ private:
if (BBExecutable.count(I->getParent())) // Inst is executable?
visit(*I);
}
-
- /// RemoveFromOverdefinedPHIs - If I has any entries in the
- /// UsersOfOverdefinedPHIs map for PN, remove them now.
- void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
- if (UsersOfOverdefinedPHIs.empty()) return;
- typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
- std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
- for (ItTy It = Range.first, E = Range.second; It != E;) {
- if (It->second == I)
- UsersOfOverdefinedPHIs.erase(It++);
- else
- ++It;
- }
- }
-
- /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
- /// map for I and PN, but if one is there already, do not create another.
- /// (Duplicate entries do not break anything directly, but can lead to
- /// exponential growth of the table in rare cases.)
- void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
- typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
- std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
- for (ItTy J = Range.first, E = Range.second; J != E; ++J)
- if (J->second == I)
- return;
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
- }
private:
friend class InstVisitor<SCCPSolver>;
@@ -559,7 +527,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
Succs[0] = true;
return;
}
-
+
LatticeVal BCValue = getValueState(BI->getCondition());
ConstantInt *CI = BCValue.getConstantInt();
if (CI == 0) {
@@ -569,44 +537,44 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
Succs[0] = Succs[1] = true;
return;
}
-
+
// Constant condition variables mean the branch can only go a single way.
Succs[CI->isZero()] = true;
return;
}
-
+
if (isa<InvokeInst>(TI)) {
// Invoke instructions successors are always executable.
Succs[0] = Succs[1] = true;
return;
}
-
+
if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
- if (TI.getNumSuccessors() < 2) {
+ if (!SI->getNumCases()) {
Succs[0] = true;
return;
}
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
-
+
if (CI == 0) { // Overdefined or undefined condition?
// All destinations are executable!
if (!SCValue.isUndefined())
Succs.assign(TI.getNumSuccessors(), true);
return;
}
-
- Succs[SI->findCaseValue(CI)] = true;
+
+ Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
return;
}
-
+
// TODO: This could be improved if the operand is a [cast of a] BlockAddress.
if (isa<IndirectBrInst>(&TI)) {
// Just mark all destinations executable!
Succs.assign(TI.getNumSuccessors(), true);
return;
}
-
+
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << TI << '\n';
#endif
@@ -628,7 +596,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional())
return true;
-
+
LatticeVal BCValue = getValueState(BI->getCondition());
// Overdefined condition variables mean the branch could go either way,
@@ -636,40 +604,33 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
ConstantInt *CI = BCValue.getConstantInt();
if (CI == 0)
return !BCValue.isUndefined();
-
+
// Constant condition variables mean the branch can only go a single way.
return BI->getSuccessor(CI->isZero()) == To;
}
-
+
// Invoke instructions successors are always executable.
if (isa<InvokeInst>(TI))
return true;
-
+
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (SI->getNumSuccessors() < 2)
+ if (SI->getNumCases() < 1)
return true;
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
-
+
if (CI == 0)
return !SCValue.isUndefined();
- // Make sure to skip the "default value" which isn't a value
- for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
- if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
- return SI->getSuccessor(i) == To;
-
- // If the constant value is not equal to any of the branches, we must
- // execute default branch.
- return SI->getDefaultDest() == To;
+ return SI->findCaseValue(CI).getCaseSuccessor() == To;
}
-
+
// Just mark all destinations executable!
// TODO: This could be improved if the operand is a [cast of a] BlockAddress.
if (isa<IndirectBrInst>(TI))
return true;
-
+
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
@@ -699,30 +660,15 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// TODO: We could do a lot better than this if code actually uses this.
if (PN.getType()->isStructTy())
return markAnythingOverdefined(&PN);
-
- if (getValueState(&PN).isOverdefined()) {
- // There may be instructions using this PHI node that are not overdefined
- // themselves. If so, make sure that they know that the PHI node operand
- // changed.
- typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
- std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN);
-
- if (Range.first == Range.second)
- return;
-
- SmallVector<Instruction*, 16> Users;
- for (ItTy I = Range.first, E = Range.second; I != E; ++I)
- Users.push_back(I->second);
- while (!Users.empty())
- visit(Users.pop_back_val());
+
+ if (getValueState(&PN).isOverdefined())
return; // Quick exit
- }
// Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
// and slow us down a lot. Just mark them overdefined.
if (PN.getNumIncomingValues() > 64)
return markOverdefined(&PN);
-
+
// Look at all of the executable operands of the PHI node. If any of them
// are overdefined, the PHI becomes overdefined as well. If they are all
// constant, and they agree with each other, the PHI becomes the identical
@@ -736,7 +682,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
continue;
-
+
if (IV.isOverdefined()) // PHI node becomes overdefined!
return markOverdefined(&PN);
@@ -744,11 +690,11 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
OperandVal = IV.getConstant();
continue;
}
-
+
// There is already a reachable operand. If we conflict with it,
// then the PHI node becomes overdefined. If we agree with it, we
// can continue on.
-
+
// Check to see if there are two different constants merging, if so, the PHI
// node is overdefined.
if (IV.getConstant() != OperandVal)
@@ -772,7 +718,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
Function *F = I.getParent()->getParent();
Value *ResultOp = I.getOperand(0);
-
+
// If we are tracking the return value of this function, merge it in.
if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
DenseMap<Function*, LatticeVal>::iterator TFRVI =
@@ -782,7 +728,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
return;
}
}
-
+
// Handle functions that return multiple values.
if (!TrackedMultipleRetVals.empty()) {
if (StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
@@ -790,7 +736,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
getStructValueState(ResultOp, i));
-
+
}
}
@@ -811,7 +757,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
markOverdefined(&I);
else if (OpSt.isConstant()) // Propagate constant value
- markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
+ markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
OpSt.getConstant(), I.getType()));
}
@@ -821,7 +767,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
// structs in structs.
if (EVI.getType()->isStructTy())
return markAnythingOverdefined(&EVI);
-
+
// If this is extracting from more than one level of struct, we don't know.
if (EVI.getNumIndices() != 1)
return markOverdefined(&EVI);
@@ -841,15 +787,15 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
StructType *STy = dyn_cast<StructType>(IVI.getType());
if (STy == 0)
return markOverdefined(&IVI);
-
+
// If this has more than one index, we can't handle it, drive all results to
// undef.
if (IVI.getNumIndices() != 1)
return markAnythingOverdefined(&IVI);
-
+
Value *Aggr = IVI.getAggregateOperand();
unsigned Idx = *IVI.idx_begin();
-
+
// Compute the result based on what we're inserting.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
// This passes through all values that aren't the inserted element.
@@ -858,7 +804,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
continue;
}
-
+
Value *Val = IVI.getInsertedValueOperand();
if (Val->getType()->isStructTy())
// We don't track structs in structs.
@@ -875,25 +821,25 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
// TODO: We could do a lot better than this if code actually uses this.
if (I.getType()->isStructTy())
return markAnythingOverdefined(&I);
-
+
LatticeVal CondValue = getValueState(I.getCondition());
if (CondValue.isUndefined())
return;
-
+
if (ConstantInt *CondCB = CondValue.getConstantInt()) {
Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
mergeInValue(&I, getValueState(OpVal));
return;
}
-
+
// Otherwise, the condition is overdefined or a constant we can't evaluate.
// See if we can produce something better than overdefined based on the T/F
// value.
LatticeVal TVal = getValueState(I.getTrueValue());
LatticeVal FVal = getValueState(I.getFalseValue());
-
+
// select ?, C, C -> C.
- if (TVal.isConstant() && FVal.isConstant() &&
+ if (TVal.isConstant() && FVal.isConstant() &&
TVal.getConstant() == FVal.getConstant())
return markConstant(&I, FVal.getConstant());
@@ -908,7 +854,7 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal V1State = getValueState(I.getOperand(0));
LatticeVal V2State = getValueState(I.getOperand(1));
-
+
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
@@ -916,14 +862,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
return markConstant(IV, &I,
ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
V2State.getConstant()));
-
+
// If something is undef, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
return;
-
+
// Otherwise, one of our operands is overdefined. Try to produce something
// better than overdefined with some tricks.
-
+
// If this is an AND or OR with 0 or -1, it doesn't matter that the other
// operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
@@ -945,7 +891,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
Constant::getAllOnesValue(I.getType()));
return;
}
-
+
if (I.getOpcode() == Instruction::And) {
// X and 0 = 0
if (NonOverdefVal->getConstant()->isNullValue())
@@ -959,64 +905,6 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
}
- // If both operands are PHI nodes, it is possible that this instruction has
- // a constant value, despite the fact that the PHI node doesn't. Check for
- // this condition now.
- if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
- if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
- if (PN1->getParent() == PN2->getParent()) {
- // Since the two PHI nodes are in the same basic block, they must have
- // entries for the same predecessors. Walk the predecessor list, and
- // if all of the incoming values are constants, and the result of
- // evaluating this expression with all incoming value pairs is the
- // same, then this expression is a constant even though the PHI node
- // is not a constant!
- LatticeVal Result;
- for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
- LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
- BasicBlock *InBlock = PN1->getIncomingBlock(i);
- LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
-
- if (In1.isOverdefined() || In2.isOverdefined()) {
- Result.markOverdefined();
- break; // Cannot fold this operation over the PHI nodes!
- }
-
- if (In1.isConstant() && In2.isConstant()) {
- Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
- In2.getConstant());
- if (Result.isUndefined())
- Result.markConstant(V);
- else if (Result.isConstant() && Result.getConstant() != V) {
- Result.markOverdefined();
- break;
- }
- }
- }
-
- // If we found a constant value here, then we know the instruction is
- // constant despite the fact that the PHI nodes are overdefined.
- if (Result.isConstant()) {
- markConstant(IV, &I, Result.getConstant());
- // Remember that this instruction is virtually using the PHI node
- // operands.
- InsertInOverdefinedPHIs(&I, PN1);
- InsertInOverdefinedPHIs(&I, PN2);
- return;
- }
-
- if (Result.isUndefined())
- return;
-
- // Okay, this really is overdefined now. Since we might have
- // speculatively thought that this was not overdefined before, and
- // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
- // make sure to clean out any entries that we put there, for
- // efficiency.
- RemoveFromOverdefinedPHIs(&I, PN1);
- RemoveFromOverdefinedPHIs(&I, PN2);
- }
-
markOverdefined(&I);
}
@@ -1029,75 +917,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
if (IV.isOverdefined()) return;
if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
- V1State.getConstant(),
+ return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
+ V1State.getConstant(),
V2State.getConstant()));
-
+
// If operands are still undefined, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
return;
-
- // If something is overdefined, use some tricks to avoid ending up and over
- // defined if we can.
-
- // If both operands are PHI nodes, it is possible that this instruction has
- // a constant value, despite the fact that the PHI node doesn't. Check for
- // this condition now.
- if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
- if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
- if (PN1->getParent() == PN2->getParent()) {
- // Since the two PHI nodes are in the same basic block, they must have
- // entries for the same predecessors. Walk the predecessor list, and
- // if all of the incoming values are constants, and the result of
- // evaluating this expression with all incoming value pairs is the
- // same, then this expression is a constant even though the PHI node
- // is not a constant!
- LatticeVal Result;
- for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
- LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
- BasicBlock *InBlock = PN1->getIncomingBlock(i);
- LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
-
- if (In1.isOverdefined() || In2.isOverdefined()) {
- Result.markOverdefined();
- break; // Cannot fold this operation over the PHI nodes!
- }
-
- if (In1.isConstant() && In2.isConstant()) {
- Constant *V = ConstantExpr::getCompare(I.getPredicate(),
- In1.getConstant(),
- In2.getConstant());
- if (Result.isUndefined())
- Result.markConstant(V);
- else if (Result.isConstant() && Result.getConstant() != V) {
- Result.markOverdefined();
- break;
- }
- }
- }
-
- // If we found a constant value here, then we know the instruction is
- // constant despite the fact that the PHI nodes are overdefined.
- if (Result.isConstant()) {
- markConstant(&I, Result.getConstant());
- // Remember that this instruction is virtually using the PHI node
- // operands.
- InsertInOverdefinedPHIs(&I, PN1);
- InsertInOverdefinedPHIs(&I, PN2);
- return;
- }
-
- if (Result.isUndefined())
- return;
-
- // Okay, this really is overdefined now. Since we might have
- // speculatively thought that this was not overdefined before, and
- // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
- // make sure to clean out any entries that we put there, for
- // efficiency.
- RemoveFromOverdefinedPHIs(&I, PN1);
- RemoveFromOverdefinedPHIs(&I, PN2);
- }
markOverdefined(&I);
}
@@ -1135,7 +961,7 @@ void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
EltState.getConstant(),
IdxState.getConstant()));
else if (ValState.isUndefined() && EltState.isConstant() &&
- IdxState.isConstant())
+ IdxState.isConstant())
markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
EltState.getConstant(),
IdxState.getConstant()));
@@ -1153,17 +979,17 @@ void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
if (MaskState.isUndefined() ||
(V1State.isUndefined() && V2State.isUndefined()))
return; // Undefined output if mask or both inputs undefined.
-
+
if (V1State.isOverdefined() || V2State.isOverdefined() ||
MaskState.isOverdefined()) {
markOverdefined(&I);
} else {
// A mix of constant/undef inputs.
- Constant *V1 = V1State.isConstant() ?
+ Constant *V1 = V1State.isConstant() ?
V1State.getConstant() : UndefValue::get(I.getType());
- Constant *V2 = V2State.isConstant() ?
+ Constant *V2 = V2State.isConstant() ?
V2State.getConstant() : UndefValue::get(I.getType());
- Constant *Mask = MaskState.isConstant() ?
+ Constant *Mask = MaskState.isConstant() ?
MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
}
@@ -1183,7 +1009,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
LatticeVal State = getValueState(I.getOperand(i));
if (State.isUndefined())
return; // Operands are not resolved yet.
-
+
if (State.isOverdefined())
return markOverdefined(&I);
@@ -1200,10 +1026,10 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
// If this store is of a struct, ignore it.
if (SI.getOperand(0)->getType()->isStructTy())
return;
-
+
if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
return;
-
+
GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
@@ -1221,22 +1047,22 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
// If this load is of a struct, just mark the result overdefined.
if (I.getType()->isStructTy())
return markAnythingOverdefined(&I);
-
+
LatticeVal PtrVal = getValueState(I.getOperand(0));
if (PtrVal.isUndefined()) return; // The pointer is not resolved yet!
-
+
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
if (!PtrVal.isConstant() || I.isVolatile())
return markOverdefined(IV, &I);
-
+
Constant *Ptr = PtrVal.getConstant();
// load null -> null
if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
return markConstant(IV, &I, Constant::getNullValue(I.getType()));
-
+
// Transform load (constant global) into the value loaded.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
if (!TrackedGlobals.empty()) {
@@ -1262,7 +1088,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
void SCCPSolver::visitCallSite(CallSite CS) {
Function *F = CS.getCalledFunction();
Instruction *I = CS.getInstruction();
-
+
// The common case is that we aren't tracking the callee, either because we
// are not doing interprocedural analysis or the callee is indirect, or is
// external. Handle these cases first.
@@ -1270,17 +1096,17 @@ void SCCPSolver::visitCallSite(CallSite CS) {
CallOverdefined:
// Void return and not tracking callee, just bail.
if (I->getType()->isVoidTy()) return;
-
+
// Otherwise, if we have a single return value case, and if the function is
// a declaration, maybe we can constant fold it.
if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
canConstantFoldCallTo(F)) {
-
+
SmallVector<Constant*, 8> Operands;
for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
AI != E; ++AI) {
LatticeVal State = getValueState(*AI);
-
+
if (State.isUndefined())
return; // Operands are not resolved yet.
if (State.isOverdefined())
@@ -1288,10 +1114,10 @@ CallOverdefined:
assert(State.isConstant() && "Unknown state!");
Operands.push_back(State.getConstant());
}
-
+
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI))
return markConstant(I, C);
}
@@ -1304,7 +1130,7 @@ CallOverdefined:
// the formal arguments of the function.
if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
MarkBlockExecutable(F->begin());
-
+
// Propagate information from this call site into the callee.
CallSite::arg_iterator CAI = CS.arg_begin();
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
@@ -1315,7 +1141,7 @@ CallOverdefined:
markOverdefined(AI);
continue;
}
-
+
if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal CallArg = getStructValueState(*CAI, i);
@@ -1326,22 +1152,22 @@ CallOverdefined:
}
}
}
-
+
// If this is a single/zero retval case, see if we're tracking the function.
if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
if (!MRVFunctionsTracked.count(F))
goto CallOverdefined; // Not tracking this callee.
-
+
// If we are tracking this callee, propagate the result of the function
// into this call site.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- mergeInValue(getStructValueState(I, i), I,
+ mergeInValue(getStructValueState(I, i), I,
TrackedMultipleRetVals[std::make_pair(F, i)]);
} else {
DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
if (TFRVI == TrackedRetVals.end())
goto CallOverdefined; // Not tracking this callee.
-
+
// If so, propagate the return value of the callee into this call result.
mergeInValue(I, TFRVI->second);
}
@@ -1370,7 +1196,7 @@ void SCCPSolver::Solve() {
if (Instruction *I = dyn_cast<Instruction>(*UI))
OperandChangedState(I);
}
-
+
// Process the instruction work list.
while (!InstWorkList.empty()) {
Value *I = InstWorkList.pop_back_val();
@@ -1427,11 +1253,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (!BBExecutable.count(BB))
continue;
-
+
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// Look for instructions which produce undef values.
if (I->getType()->isVoidTy()) continue;
-
+
if (StructType *STy = dyn_cast<StructType>(I->getType())) {
// Only a few things that can be structs matter for undef.
@@ -1442,7 +1268,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
continue;
// extractvalue and insertvalue don't need to be marked; they are
- // tracked as precisely as their operands.
+ // tracked as precisely as their operands.
if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
continue;
@@ -1549,12 +1375,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// X / undef -> undef. No change.
// X % undef -> undef. No change.
if (Op1LV.isUndefined()) break;
-
+
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
-
+
case Instruction::AShr:
// X >>a undef -> undef.
if (Op1LV.isUndefined()) break;
@@ -1587,7 +1413,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
} else {
// Leave Op1LV as Operand(1)'s LatticeValue.
}
-
+
if (Op1LV.isConstant())
markForcedConstant(I, Op1LV.getConstant());
else
@@ -1627,7 +1453,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
}
}
-
+
// Check to see if we have a branch or switch on an undefined value. If so
// we force the branch to go one way or the other to make the successor
// values live. It doesn't really matter which way we force it.
@@ -1636,7 +1462,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (!BI->isConditional()) continue;
if (!getValueState(BI->getCondition()).isUndefined())
continue;
-
+
// If the input to SCCP is actually branch on undef, fix the undef to
// false.
if (isa<UndefValue>(BI->getCondition())) {
@@ -1644,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
markEdgeExecutable(BB, TI->getSuccessor(1));
return true;
}
-
+
// Otherwise, it is a branch on a symbolic value which is currently
// considered to be undef. Handle this by forcing the input value to the
// branch to false.
@@ -1652,22 +1478,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
ConstantInt::getFalse(TI->getContext()));
return true;
}
-
+
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (SI->getNumSuccessors() < 2) // no cases
+ if (!SI->getNumCases())
continue;
if (!getValueState(SI->getCondition()).isUndefined())
continue;
-
+
// If the input to SCCP is actually switch on undef, fix the undef to
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
- SI->setCondition(SI->getCaseValue(1));
- markEdgeExecutable(BB, TI->getSuccessor(1));
+ SI->setCondition(SI->case_begin().getCaseValue());
+ markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
return true;
}
-
- markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+
+ markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
return true;
}
}
@@ -1683,6 +1509,9 @@ namespace {
/// Sparse Conditional Constant Propagator.
///
struct SCCP : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
initializeSCCPPass(*PassRegistry::getPassRegistry());
@@ -1735,7 +1564,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
//
bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(F.begin());
@@ -1764,7 +1595,7 @@ bool SCCP::runOnFunction(Function &F) {
MadeChanges = true;
continue;
}
-
+
// Iterate over all of the instructions in a function, replacing them with
// constants if we have found them to be of constant values.
//
@@ -1772,25 +1603,25 @@ bool SCCP::runOnFunction(Function &F) {
Instruction *Inst = BI++;
if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
continue;
-
+
// TODO: Reconstruct structs from their elements.
if (Inst->getType()->isStructTy())
continue;
-
+
LatticeVal IV = Solver.getLatticeValueFor(Inst);
if (IV.isOverdefined())
continue;
-
+
Constant *Const = IV.isConstant()
? IV.getConstant() : UndefValue::get(Inst->getType());
DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
// Replaces all of the uses of a variable with uses of the constant.
Inst->replaceAllUsesWith(Const);
-
+
// Delete the instruction.
Inst->eraseFromParent();
-
+
// Hey, we just changed something!
MadeChanges = true;
++NumInstRemoved;
@@ -1807,6 +1638,9 @@ namespace {
/// Constant Propagation.
///
struct IPSCCP : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID;
IPSCCP() : ModulePass(ID) {
initializeIPSCCPPass(*PassRegistry::getPassRegistry());
@@ -1816,7 +1650,11 @@ namespace {
} // end anonymous namespace
char IPSCCP::ID = 0;
-INITIALIZE_PASS(IPSCCP, "ipsccp",
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
@@ -1855,7 +1693,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
// that are in the input. As IPSCCP runs through and simplifies code,
@@ -1863,19 +1703,19 @@ bool IPSCCP::runOnModule(Module &M) {
// address-taken-ness. Because of this, we keep track of their addresses from
// the first pass so we can use them for the later simplification pass.
SmallPtrSet<Function*, 32> AddressTakenFunctions;
-
+
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
//
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration())
continue;
-
+
// If this is a strong or ODR definition of this function, then we can
// propagate information about its result into callsites of it.
if (!F->mayBeOverridden())
Solver.AddTrackedFunction(F);
-
+
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
@@ -1890,7 +1730,7 @@ bool IPSCCP::runOnModule(Module &M) {
// Assume the function is called.
Solver.MarkBlockExecutable(F->begin());
-
+
// Assume nothing about the incoming arguments.
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI)
@@ -1928,17 +1768,17 @@ bool IPSCCP::runOnModule(Module &M) {
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
if (AI->use_empty() || AI->getType()->isStructTy()) continue;
-
+
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
LatticeVal IV = Solver.getLatticeValueFor(AI);
if (IV.isOverdefined()) continue;
-
+
Constant *CST = IV.isConstant() ?
IV.getConstant() : UndefValue::get(AI->getType());
DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n");
-
+
// Replaces all of the uses of a variable with uses of the
// constant.
AI->replaceAllUsesWith(CST);
@@ -1967,19 +1807,19 @@ bool IPSCCP::runOnModule(Module &M) {
new UnreachableInst(M.getContext(), BB);
continue;
}
-
+
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
Instruction *Inst = BI++;
if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
continue;
-
+
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
-
+
LatticeVal IV = Solver.getLatticeValueFor(Inst);
if (IV.isOverdefined())
continue;
-
+
Constant *Const = IV.isConstant()
? IV.getConstant() : UndefValue::get(Inst->getType());
DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
@@ -1987,7 +1827,7 @@ bool IPSCCP::runOnModule(Module &M) {
// Replaces all of the uses of a variable with uses of the
// constant.
Inst->replaceAllUsesWith(Const);
-
+
// Delete the instruction.
if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
Inst->eraseFromParent();
@@ -2029,15 +1869,15 @@ bool IPSCCP::runOnModule(Module &M) {
llvm_unreachable("Didn't fold away reference to block!");
}
#endif
-
+
// Make this an uncond branch to the first successor.
TerminatorInst *TI = I->getParent()->getTerminator();
BranchInst::Create(TI->getSuccessor(0), TI);
-
+
// Remove entries in successor phi nodes to remove edges.
for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
TI->getSuccessor(i)->removePredecessor(TI->getParent());
-
+
// Remove the old terminator.
TI->eraseFromParent();
}
@@ -2060,7 +1900,7 @@ bool IPSCCP::runOnModule(Module &M) {
// last use of a function, the order of processing functions would affect
// whether other functions are optimizable.
SmallVector<ReturnInst*, 8> ReturnsToZap;
-
+
// TODO: Process multiple value ret instructions also.
const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
@@ -2068,11 +1908,11 @@ bool IPSCCP::runOnModule(Module &M) {
Function *F = I->first;
if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
continue;
-
+
// We can only do this if we know that nothing else can call the function.
if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
continue;
-
+
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
if (!isa<UndefValue>(RI->getOperand(0)))
@@ -2084,9 +1924,9 @@ bool IPSCCP::runOnModule(Module &M) {
Function *F = ReturnsToZap[i]->getParent()->getParent();
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
}
-
- // If we inferred constant or undef values for globals variables, we can delete
- // the global and any stores that remain to it.
+
+ // If we inferred constant or undef values for globals variables, we can
+ // delete the global and any stores that remain to it.
const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
E = TG.end(); I != E; ++I) {
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index f6918deafebd..7d65bcc064e1 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -51,6 +51,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCAPElimPass(Registry);
initializeObjCARCExpandPass(Registry);
initializeObjCARCContractPass(Registry);
initializeObjCARCOptPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c6d9123d6611..026fea117b20 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -13,7 +13,7 @@
// each member (if possible). Then, if possible, it transforms the individual
// alloca instructions into nice clean scalar SSA form.
//
-// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
// often interact, especially for C++ programs. As such, iterating between
// SRoA, then Mem2Reg until we run out of things to promote works well.
//
@@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return false;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
// See if all uses can be converted.
@@ -572,8 +574,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
- unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- if (NumBytes != 0) {
+ int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
+ if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
+ unsigned NumBytes = static_cast<unsigned>(SNumBytes);
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
// Compute the value replicated the right number of times.
@@ -806,8 +809,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
return Builder.CreateBitCast(SV, AllocaType);
// Must be an element insertion.
- assert(SV->getType() == VTy->getElementType());
- uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ Type *EltTy = VTy->getElementType();
+ if (SV->getType() != EltTy)
+ SV = Builder.CreateBitCast(SV, EltTy);
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
}
@@ -934,13 +939,14 @@ public:
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
- if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+ if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) {
for (Value::use_iterator UI = DebugNode->use_begin(),
E = DebugNode->use_end(); UI != E; ++UI)
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
DDIs.push_back(DDI);
else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
DVIs.push_back(DVI);
+ }
LoadAndStorePromoter::run(Insts);
AI->eraseFromParent();
@@ -975,30 +981,25 @@ public:
for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
+ Value *Arg = NULL;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- Instruction *DbgVal = NULL;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
- Argument *ExtendedArg = NULL;
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ Arg = dyn_cast<Argument>(ZExt->getOperand(0));
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
- if (ExtendedArg)
- DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0,
- DIVariable(DVI->getVariable()),
- SI);
- else
- DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
- DIVariable(DVI->getVariable()),
- SI);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ Arg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (!Arg)
+ Arg = SI->getOperand(0);
} else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Instruction *DbgVal =
- DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0,
- DIVariable(DVI->getVariable()), LI);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ Arg = LI->getOperand(0);
+ } else {
+ continue;
}
+ Instruction *DbgVal =
+ DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ Inst);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
}
}
};
@@ -1517,6 +1518,9 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length == 0)
return MarkUnsafe(Info, User);
+ if (Length->isNegative())
+ return MarkUnsafe(Info, User);
+
isSafeMemAccess(Offset, Length->getZExtValue(), 0,
UI.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
@@ -1873,8 +1877,14 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
return;
// The bitcast references the original alloca. Replace its uses with
- // references to the first new element alloca.
- Instruction *Val = NewElts[0];
+ // references to the alloca containing offset zero (which is normally at
+ // index zero, but might not be in cases involving structs with elements
+ // of size zero).
+ Type *T = AI->getAllocatedType();
+ uint64_t EltOffset = 0;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+ Instruction *Val = NewElts[Idx];
if (Val->getType() != BC->getDestTy()) {
Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
Val->takeName(BC);
@@ -2146,8 +2156,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If the requested value was a vector constant, create it.
if (EltTy->isVectorTy()) {
unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
- SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
- StoreVal = ConstantVector::get(Elts);
+ StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
}
}
new StoreInst(StoreVal, EltPtr, MI);
@@ -2158,6 +2167,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
}
unsigned EltSize = TD->getTypeAllocSize(EltTy);
+ if (!EltSize)
+ continue;
IRBuilder<> Builder(MI);
@@ -2524,13 +2535,12 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
// ignore it if we know that the value isn't captured.
unsigned ArgNo = CS.getArgumentNo(UI);
if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() ||
- CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)))
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
continue;
// If this is being passed as a byval argument, the caller is making a
// copy, so it is only a read of the alloca.
- if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ if (CS.isByValArgument(ArgNo))
continue;
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index fbb9465743ce..9c49ec1c84d2 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -256,19 +256,18 @@ struct StrChrOpt : public LibCallOptimization {
ConstantInt::get(TD->getIntPtrType(*Context), Len),
B, TD);
}
-
+
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
- std::string Str;
- if (!GetConstantStringInfo(SrcStr, Str))
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str))
return 0;
- // strchr can find the nul character.
- Str += '\0';
-
- // Compute the offset.
- size_t I = Str.find(CharC->getSExtValue());
- if (I == std::string::npos) // Didn't find the char. strchr returns null.
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
@@ -296,20 +295,18 @@ struct StrRChrOpt : public LibCallOptimization {
if (!CharC)
return 0;
- std::string Str;
- if (!GetConstantStringInfo(SrcStr, Str)) {
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (TD && CharC->isZero())
return EmitStrChr(SrcStr, '\0', B, TD);
return 0;
}
- // strrchr can find the nul character.
- Str += '\0';
-
// Compute the offset.
- size_t I = Str.rfind(CharC->getSExtValue());
- if (I == std::string::npos) // Didn't find the char. Return null.
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
return Constant::getNullValue(CI->getType());
// strrchr(s+n,c) -> gep(s+n+i,c)
@@ -334,14 +331,13 @@ struct StrCmpOpt : public LibCallOptimization {
if (Str1P == Str2P) // strcmp(x,x) -> 0
return ConstantInt::get(CI->getType(), 0);
- std::string Str1, Str2;
- bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
- bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
// strcmp(x, y) -> cnst (if both x and y are constant strings)
if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(),
- StringRef(Str1).compare(Str2));
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
@@ -397,14 +393,14 @@ struct StrNCmpOpt : public LibCallOptimization {
if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
- std::string Str1, Str2;
- bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
- bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
// strncmp(x, y) -> cnst (if both x and y are constant strings)
if (HasStr1 && HasStr2) {
- StringRef SubStr1 = StringRef(Str1).substr(0, Length);
- StringRef SubStr2 = StringRef(Str2).substr(0, Length);
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
}
@@ -549,9 +545,9 @@ struct StrPBrkOpt : public LibCallOptimization {
FT->getReturnType() != FT->getParamType(0))
return 0;
- std::string S1, S2;
- bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
// strpbrk(s, "") -> NULL
// strpbrk("", s) -> NULL
@@ -609,9 +605,9 @@ struct StrSpnOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy())
return 0;
- std::string S1, S2;
- bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
// strspn(s, "") -> 0
// strspn("", s) -> 0
@@ -619,8 +615,11 @@ struct StrSpnOpt : public LibCallOptimization {
return Constant::getNullValue(CI->getType());
// Constant folding.
- if (HasS1 && HasS2)
- return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
return 0;
}
@@ -638,17 +637,20 @@ struct StrCSpnOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy())
return 0;
- std::string S1, S2;
- bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
// strcspn("", s) -> 0
if (HasS1 && S1.empty())
return Constant::getNullValue(CI->getType());
// Constant folding.
- if (HasS1 && HasS2)
- return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
// strcspn(s, "") -> strlen(s)
if (TD && HasS2 && S2.empty())
@@ -692,9 +694,9 @@ struct StrStrOpt : public LibCallOptimization {
}
// See if either input string is a constant string.
- std::string SearchStr, ToFindStr;
- bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
- bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
// fold strstr(x, "") -> x.
if (HasStr2 && ToFindStr.empty())
@@ -704,7 +706,7 @@ struct StrStrOpt : public LibCallOptimization {
if (HasStr1 && HasStr2) {
std::string::size_type Offset = SearchStr.find(ToFindStr);
- if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
@@ -756,11 +758,11 @@ struct MemCmpOpt : public LibCallOptimization {
}
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
- std::string LHSStr, RHSStr;
- if (GetConstantStringInfo(LHS, LHSStr) &&
- GetConstantStringInfo(RHS, RHSStr)) {
+ StringRef LHSStr, RHSStr;
+ if (getConstantStringInfo(LHS, LHSStr) &&
+ getConstantStringInfo(RHS, RHSStr)) {
// Make sure we're not reading out-of-bounds memory.
- if (Len > LHSStr.length() || Len > RHSStr.length())
+ if (Len > LHSStr.size() || Len > RHSStr.size())
return 0;
uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
return ConstantInt::get(CI->getType(), Ret);
@@ -841,6 +843,28 @@ struct MemSetOpt : public LibCallOptimization {
//===----------------------------------------------------------------------===//
//===---------------------------------------===//
+// 'cos*' Optimizations
+
+struct CosOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ // cos(-x) -> cos(x)
+ Value *Op1 = CI->getArgOperand(0);
+ if (BinaryOperator::isFNeg(Op1)) {
+ BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+ return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
// 'pow*' Optimizations
struct PowOpt : public LibCallOptimization {
@@ -870,7 +894,7 @@ struct PowOpt : public LibCallOptimization {
if (Op2C->isExactlyValue(0.5)) {
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
- // and negative infinite correctly.
+ // and negative infinity correctly.
// TODO: In fast-math mode, this could be just sqrt(x).
// TODO: In finite-only mode, this could be just fabs(sqrt(x)).
Value *Inf = ConstantFP::getInfinity(CI->getType());
@@ -963,8 +987,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
// floor((double)floatval) -> (double)floorf(floatval)
Value *V = Cast->getOperand(0);
- V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
- Callee->getAttributes());
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
return B.CreateFPExt(V, B.getDoubleTy());
}
};
@@ -1000,7 +1023,7 @@ struct FFSOpt : public LibCallOptimization {
Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, Op, "cttz");
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1095,8 +1118,8 @@ struct PrintFOpt : public LibCallOptimization {
Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
// Check for a fixed format string.
- std::string FormatStr;
- if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
return 0;
// Empty format string -> noop.
@@ -1122,11 +1145,9 @@ struct PrintFOpt : public LibCallOptimization {
FormatStr.find('%') == std::string::npos) { // no format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
- FormatStr.erase(FormatStr.end()-1);
- Constant *C = ConstantArray::get(*Context, FormatStr, true);
- C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
- GlobalVariable::InternalLinkage, C, "str");
- EmitPutS(C, B, TD);
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ EmitPutS(GV, B, TD);
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), FormatStr.size()+1);
}
@@ -1184,8 +1205,8 @@ struct SPrintFOpt : public LibCallOptimization {
Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
// Check for a fixed format string.
- std::string FormatStr;
- if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// If we just have a format string (nothing else crazy) transform it.
@@ -1296,7 +1317,8 @@ struct FWriteOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
// If this is writing one byte, turn it into fputc.
- if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
@@ -1326,7 +1348,7 @@ struct FPutsOpt : public LibCallOptimization {
if (!Len) return 0;
EmitFWrite(CI->getArgOperand(0),
ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD);
+ CI->getArgOperand(1), B, TD, TLI);
return CI; // Known to have no uses (see above).
}
};
@@ -1338,8 +1360,8 @@ struct FPrintFOpt : public LibCallOptimization {
Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
// All the optimizations depend on the format string.
- std::string FormatStr;
- if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
@@ -1354,7 +1376,7 @@ struct FPrintFOpt : public LibCallOptimization {
EmitFWrite(CI->getArgOperand(1),
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()),
- CI->getArgOperand(0), B, TD);
+ CI->getArgOperand(0), B, TD, TLI);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1376,7 +1398,7 @@ struct FPrintFOpt : public LibCallOptimization {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+ EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
return CI;
}
return 0;
@@ -1422,8 +1444,8 @@ struct PutsOpt : public LibCallOptimization {
return 0;
// Check for a constant string.
- std::string Str;
- if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return 0;
if (Str.empty() && CI->use_empty()) {
@@ -1457,7 +1479,7 @@ namespace {
StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
- PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
// Integer Optimizations
FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
ToAsciiOpt ToAscii;
@@ -1472,6 +1494,7 @@ namespace {
SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
+ void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1502,6 +1525,11 @@ FunctionPass *llvm::createSimplifyLibCallsPass() {
return new SimplifyLibCalls();
}
+void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
+ if (TLI->has(F))
+ Optimizations[TLI->getName(F)] = Opt;
+}
+
/// Optimizations - Populate the Optimizations map with all the optimizations
/// we know.
void SimplifyLibCalls::InitOptimizations() {
@@ -1527,14 +1555,17 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strcspn"] = &StrCSpn;
Optimizations["strstr"] = &StrStr;
Optimizations["memcmp"] = &MemCmp;
- if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
+ AddOpt(LibFunc::memcpy, &MemCpy);
Optimizations["memmove"] = &MemMove;
- if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
+ AddOpt(LibFunc::memset, &MemSet);
// _chk variants of String and Memory LibCall Optimizations.
Optimizations["__strcpy_chk"] = &StrCpyChk;
// Math Library Optimizations
+ Optimizations["cosf"] = &Cos;
+ Optimizations["cos"] = &Cos;
+ Optimizations["cosl"] = &Cos;
Optimizations["powf"] = &Pow;
Optimizations["pow"] = &Pow;
Optimizations["powl"] = &Pow;
@@ -1582,8 +1613,8 @@ void SimplifyLibCalls::InitOptimizations() {
// Formatting and IO Optimizations
Optimizations["sprintf"] = &SPrintF;
Optimizations["printf"] = &PrintF;
- Optimizations["fwrite"] = &FWrite;
- Optimizations["fputs"] = &FPuts;
+ AddOpt(LibFunc::fwrite, &FWrite);
+ AddOpt(LibFunc::fputs, &FPuts);
Optimizations["fprintf"] = &FPrintF;
Optimizations["puts"] = &Puts;
}
@@ -2348,9 +2379,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * cbrt(sqrt(x)) -> pow(x,1/6)
// * cbrt(sqrt(x)) -> pow(x,1/9)
//
-// cos, cosf, cosl:
-// * cos(-x) -> cos(x)
-//
// exp, expf, expl:
// * exp(log(x)) -> x
//
@@ -2387,6 +2415,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * stpcpy(str, "literal") ->
// llvm.memcpy(str,"literal",strlen("literal")+1,1)
//
+// strchr:
+// * strchr(p, 0) -> strlen(p)
// tan, tanf, tanl:
// * tan(atan(x)) -> x
//
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c83f56c4d2d7..ef65c0a3a907 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
@@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!Inst->isSafeToSpeculativelyExecute()) {
+ if (!isSafeToSpeculativelyExecute(Inst)) {
DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
return false;
}
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8e5a1eb2c843..d83145289ce2 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
- BasicBlock *MemBB = MemoryInst->getParent();
- for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
- UI != E; ++UI)
- // We know that uses of arguments and instructions have to be instructions.
- if (cast<Instruction>(*UI)->getParent() == MemBB)
- return true;
-
- return false;
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a7f9efd562e1..3859a1aec4a7 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -249,7 +249,6 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
if (Term->getSuccessor(i) == Succ)
return i;
}
- return 0;
}
/// SplitEdge - Split the edge connecting specified block. Pass P must
@@ -453,9 +452,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- BasicBlock *const *Preds,
- unsigned NumPreds, const char *Suffix,
- Pass *P) {
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix, Pass *P) {
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
@@ -464,7 +462,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
BranchInst *BI = BranchInst::Create(BB, NewBB);
// Move the edges from Preds to point to NewBB instead of BB.
- for (unsigned i = 0; i != NumPreds; ++i) {
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
// This is slightly more strict than necessary; the minimum requirement
// is that there be no more than one indirectbr branching to BB. And
// all BlockAddress uses would need to be updated.
@@ -477,7 +475,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// node becomes an incoming value for BB's phi node. However, if the Preds
// list is empty, we need to insert dummy entries into the PHI nodes in BB to
// account for the newly created predecessor.
- if (NumPreds == 0) {
+ if (Preds.size() == 0) {
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -486,12 +484,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
- P, HasLoopExit);
+ UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
- P, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
return NewBB;
}
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
deleted file mode 100644
index 23a30cc58507..000000000000
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===- BasicInliner.cpp - Basic function level inliner --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple function based inliner that does not use
-// call graph information.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "basicinliner"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
-#include "llvm/Transforms/Utils/BasicInliner.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include <vector>
-
-using namespace llvm;
-
-static cl::opt<unsigned>
-BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
- cl::desc("Control the amount of basic inlining to perform (default = 200)"));
-
-namespace llvm {
-
- /// BasicInlinerImpl - BasicInliner implemantation class. This hides
- /// container info, used by basic inliner, from public interface.
- struct BasicInlinerImpl {
-
- BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
- void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
- public:
- BasicInlinerImpl(TargetData *T) : TD(T) {}
-
- /// addFunction - Add function into the list of functions to process.
- /// All functions must be inserted using this interface before invoking
- /// inlineFunctions().
- void addFunction(Function *F) {
- Functions.push_back(F);
- }
-
- /// neverInlineFunction - Sometimes a function is never to be inlined
- /// because of one or other reason.
- void neverInlineFunction(Function *F) {
- NeverInline.insert(F);
- }
-
- /// inlineFuctions - Walk all call sites in all functions supplied by
- /// client. Inline as many call sites as possible. Delete completely
- /// inlined functions.
- void inlineFunctions();
-
- private:
- TargetData *TD;
- std::vector<Function *> Functions;
- SmallPtrSet<const Function *, 16> NeverInline;
- SmallPtrSet<Function *, 8> DeadFunctions;
- InlineCostAnalyzer CA;
- };
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInlinerImpl::inlineFunctions() {
-
- // Scan through and identify all call sites ahead of time so that we only
- // inline call sites in the original functions, not call sites that result
- // from inlining other functions.
- std::vector<CallSite> CallSites;
-
- for (std::vector<Function *>::iterator FI = Functions.begin(),
- FE = Functions.end(); FI != FE; ++FI) {
- Function *F = *FI;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
- CallSite CS(cast<Value>(I));
- if (CS && CS.getCalledFunction()
- && !CS.getCalledFunction()->isDeclaration())
- CallSites.push_back(CS);
- }
- }
-
- DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
-
- // Inline call sites.
- bool Changed = false;
- do {
- Changed = false;
- for (unsigned index = 0; index != CallSites.size() && !CallSites.empty();
- ++index) {
- CallSite CS = CallSites[index];
- if (Function *Callee = CS.getCalledFunction()) {
-
- // Eliminate calls that are never inlinable.
- if (Callee->isDeclaration() ||
- CS.getInstruction()->getParent()->getParent() == Callee) {
- CallSites.erase(CallSites.begin() + index);
- --index;
- continue;
- }
- InlineCost IC = CA.getInlineCost(CS, NeverInline);
- if (IC.isAlways()) {
- DEBUG(dbgs() << " Inlining: cost=always"
- <<", call: " << *CS.getInstruction());
- } else if (IC.isNever()) {
- DEBUG(dbgs() << " NOT Inlining: cost=never"
- <<", call: " << *CS.getInstruction());
- continue;
- } else {
- int Cost = IC.getValue();
-
- if (Cost >= (int) BasicInlineThreshold) {
- DEBUG(dbgs() << " NOT Inlining: cost = " << Cost
- << ", call: " << *CS.getInstruction());
- continue;
- } else {
- DEBUG(dbgs() << " Inlining: cost = " << Cost
- << ", call: " << *CS.getInstruction());
- }
- }
-
- // Inline
- InlineFunctionInfo IFI(0, TD);
- if (InlineFunction(CS, IFI)) {
- if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
- Callee->hasAvailableExternallyLinkage()))
- DeadFunctions.insert(Callee);
- Changed = true;
- CallSites.erase(CallSites.begin() + index);
- --index;
- }
- }
- }
- } while (Changed);
-
- // Remove completely inlined functions from module.
- for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
- E = DeadFunctions.end(); I != E; ++I) {
- Function *D = *I;
- Module *M = D->getParent();
- M->getFunctionList().remove(D);
- }
-}
-
-BasicInliner::BasicInliner(TargetData *TD) {
- Impl = new BasicInlinerImpl(TD);
-}
-
-BasicInliner::~BasicInliner() {
- delete Impl;
-}
-
-/// addFunction - Add function into the list of functions to process.
-/// All functions must be inserted using this interface before invoking
-/// inlineFunctions().
-void BasicInliner::addFunction(Function *F) {
- Impl->addFunction(F);
-}
-
-/// neverInlineFunction - Sometimes a function is never to be inlined because
-/// of one or other reason.
-void BasicInliner::neverInlineFunction(Function *F) {
- Impl->neverInlineFunction(F);
-}
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInliner::inlineFunctions() {
- Impl->inlineFunctions();
-}
-
-}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c05291088157..f752d7981c5f 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// form, which we're in the process of restoring!
if (!Preds.empty() && HasPredOutsideOfLoop) {
BasicBlock *NewExitBB =
- SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
- "split", P);
+ SplitBlockPredecessors(Exit, Preds, "split", P);
if (P->mustPreserveAnalysisID(LCSSAID))
CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
}
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 4b5f45b31f9b..a80830328d53 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -15,11 +15,15 @@
#include "llvm/Type.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/LLVMContext.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -206,19 +210,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
/// 'floor'). This function is known to take a single of type matching 'Op' and
/// returns one value with the same type. If 'Op' is a long double, 'l' is
/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
- IRBuilder<> &B, const AttrListPtr &Attrs) {
- char NameBuffer[20];
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttrListPtr &Attrs) {
+ SmallString<20> NameBuffer;
if (!Op->getType()->isDoubleTy()) {
// If we need to add a suffix, copy into NameBuffer.
- unsigned NameLen = strlen(Name);
- assert(NameLen < sizeof(NameBuffer)-2);
- memcpy(NameBuffer, Name, NameLen);
+ NameBuffer += Name;
if (Op->getType()->isFloatTy())
- NameBuffer[NameLen] = 'f'; // floorf
+ NameBuffer += 'f'; // floorf
else
- NameBuffer[NameLen] = 'l'; // floorl
- NameBuffer[NameLen+1] = 0;
+ NameBuffer += 'l'; // floorl
Name = NameBuffer;
}
@@ -299,20 +300,21 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetData *TD) {
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ StringRef FPutsName = TLI->getName(LibFunc::fputs);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+ F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3),
B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
else
- F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+ F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
@@ -324,23 +326,25 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+ F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
TD->getIntPtrType(Context),
File->getType(), NULL);
else
- F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+ F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 7adc5f1ac2eb..7f5cb5e096ae 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,11 +1,11 @@
add_llvm_library(LLVMTransformUtils
AddrModeMatcher.cpp
BasicBlockUtils.cpp
- BasicInliner.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
CloneFunction.cpp
CloneModule.cpp
+ CmpInstAnalysis.cpp
CodeExtractor.cpp
DemoteRegToStack.cpp
InlineFunction.cpp
@@ -14,10 +14,12 @@ add_llvm_library(LLVMTransformUtils
Local.cpp
LoopSimplify.cpp
LoopUnroll.cpp
+ LoopUnrollRuntime.cpp
LowerExpectIntrinsic.cpp
LowerInvoke.cpp
LowerSwitch.cpp
Mem2Reg.cpp
+ ModuleUtils.cpp
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
SimplifyCFG.cpp
@@ -27,11 +29,3 @@ add_llvm_library(LLVMTransformUtils
Utils.cpp
ValueMapper.cpp
)
-
-add_llvm_library_dependencies(LLVMTransformUtils
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMipa
- )
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index cf21f1ed9703..20052a412277 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -23,8 +23,11 @@
#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
#include <map>
@@ -60,7 +63,6 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
- CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator());
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->getEntryBlock();
@@ -75,7 +77,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+ ValueMapTypeRemapper *TypeMapper) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -113,8 +116,23 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Create a new basic block and copy instructions into it!
BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
- VMap[&BB] = CBB; // Add basic block mapping.
+ // Add basic block mapping.
+ VMap[&BB] = CBB;
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ if (BB.hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(&BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ }
+
+ // Note return instructions for the caller.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
Returns.push_back(RI);
}
@@ -126,7 +144,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
RemapInstruction(II, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper);
}
/// CloneFunction - Return a copy of the specified function, but without
@@ -181,7 +200,6 @@ namespace {
const Function *OldFunc;
ValueToValueMapTy &VMap;
bool ModuleLevelChanges;
- SmallVectorImpl<ReturnInst*> &Returns;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
const TargetData *TD;
@@ -189,24 +207,18 @@ namespace {
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
bool moduleLevelChanges,
- SmallVectorImpl<ReturnInst*> &returns,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
const TargetData *td)
: NewFunc(newFunc), OldFunc(oldFunc),
VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
- Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
}
/// CloneBlock - The specified block is found to be reachable, clone it and
/// anything that it can reach.
void CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone);
-
- public:
- /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
- /// mapping its operands through VMap if they are available.
- Constant *ConstantFoldMappedInstruction(const Instruction *I);
};
}
@@ -214,7 +226,7 @@ namespace {
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone){
- TrackingVH<Value> &BBEntry = VMap[BB];
+ WeakVH &BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -224,25 +236,55 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
BBEntry = NewBB = BasicBlock::Create(BB->getContext());
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ //
+ // Note that we don't need to fix the mapping for unreachable blocks;
+ // the default mapping there is safe.
+ if (BB->hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+ }
+
+
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
II != IE; ++II) {
- // If this instruction constant folds, don't bother cloning the instruction,
- // instead, just add the constant to the value map.
- if (Constant *C = ConstantFoldMappedInstruction(II)) {
- VMap[II] = C;
- continue;
+ Instruction *NewInst = II->clone();
+
+ // Eagerly remap operands to the newly cloned instruction, except for PHI
+ // nodes for which we defer processing until we update the CFG.
+ if (!isa<PHINode>(NewInst)) {
+ RemapInstruction(NewInst, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+ // If we can simplify this instruction to some other value, simply add
+ // a mapping to that value rather than inserting a new instruction into
+ // the basic block.
+ if (Value *V = SimplifyInstruction(NewInst, TD)) {
+ // On the off-chance that this simplifies to an instruction in the old
+ // function, map it back into the new function.
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
+
+ VMap[II] = V;
+ delete NewInst;
+ continue;
+ }
}
- Instruction *NewInst = II->clone();
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
- NewBB->getInstList().push_back(NewInst);
VMap[II] = NewInst; // Add instruction map to value.
-
+ NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@@ -281,7 +323,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Cond = dyn_cast_or_null<ConstantInt>(V);
}
if (Cond) { // Constant fold to uncond branch!
- BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+ SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+ BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
@@ -303,38 +346,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
- CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI);
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->front();
}
-
- if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
- Returns.push_back(RI);
-}
-
-/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-/// mapping its operands through VMap if they are available.
-Constant *PruningFunctionCloner::
-ConstantFoldMappedInstruction(const Instruction *I) {
- SmallVector<Constant*, 8> Ops;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
- Ops.push_back(Op);
- else
- return 0; // All operands not constant!
-
- if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- TD);
-
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(Ops[0], TD);
-
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -361,7 +376,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- Returns, NameSuffix, CodeInfo, TD);
+ NameSuffix, CodeInfo, TD);
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
@@ -386,29 +401,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
-
- // Loop over all of the instructions in the block, fixing up operand
- // references as we go. This uses VMap to do all the hard work.
- //
- BasicBlock::iterator I = NewBB->begin();
-
- DebugLoc TheCallDL;
- if (TheCall)
- TheCallDL = TheCall->getDebugLoc();
-
+
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- // Skip over all PHI nodes, remembering them for later.
- BasicBlock::const_iterator OldI = BI->begin();
- for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
- PHIToResolve.push_back(cast<PHINode>(OldI));
- }
-
- // Otherwise, remap the rest of the instructions normally.
- for (; I != NewBB->end(); ++I)
- RemapInstruction(I, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
+ if (const PHINode *PN = dyn_cast<PHINode>(I))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+
+ // Finally, remap the terminator instructions, as those can't be remapped
+ // until all BBs are mapped.
+ RemapInstruction(NewBB->getTerminator(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -490,31 +495,55 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
++OldI;
}
}
- // NOTE: We cannot eliminate single entry phi nodes here, because of
- // VMap. Single entry phi nodes can have multiple VMap entries
- // pointing at them. Thus, deleting one would require scanning the VMap
- // to update any entries in it that would require that. This would be
- // really slow.
}
-
+
+ // Make a second pass over the PHINodes now that all of them have been
+ // remapped into the new function, simplifying the PHINode and performing any
+ // recursive simplifications exposed. This will transparently update the
+ // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+ // two PHINodes, the iteration over the old PHIs remains valid, and the
+ // mapping will just map us to the new node (which may not even be a PHI
+ // node).
+ for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+ if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
+ recursivelySimplifyInstruction(PN, TD);
+
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happen all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ Function::iterator I = Begin;
while (I != NewFunc->end()) {
+ // Check if this block has become dead during inlining or other
+ // simplifications. Note that the first block will appear dead, as it has
+ // not yet been wired up properly.
+ if (I != Begin && (pred_begin(I) == pred_end(I) ||
+ I->getSinglePredecessor() == I)) {
+ BasicBlock *DeadBB = I++;
+ DeleteDeadBlock(DeadBB);
+ continue;
+ }
+
+ // We need to simplify conditional branches and switches with a constant
+ // operand. We try to prune these out when cloning, but if the
+ // simplification required looking through PHI nodes, those are only
+ // available after forming the full basic block. That may leave some here,
+ // and we still want to prune the dead code as early as possible.
+ ConstantFoldTerminator(I);
+
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
- // Note that we can't eliminate uncond branches if the destination has
- // single-entry PHI nodes. Eliminating the single-entry phi nodes would
- // require scanning the VMap to update any entries that point to the phi
- // node.
BasicBlock *Dest = BI->getSuccessor(0);
- if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+ if (!Dest->getSinglePredecessor()) {
++I; continue;
}
-
+
+ // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+ // above should have zapped all of them..
+ assert(!isa<PHINode>(Dest->begin()));
+
// We know all single-entry PHI nodes in the inlined function have been
// removed, so we just need to splice the blocks.
BI->eraseFromParent();
@@ -530,4 +559,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Do not increment I, iteratively merge all things this block branches to.
}
+
+ // Make a final pass over the basic blocks from theh old function to gather
+ // any return instructions which survived folding. We have to do this here
+ // because we can iteratively remove and merge returns above.
+ for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+ E = NewFunc->end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+ Returns.push_back(RI);
}
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
new file mode 100644
index 000000000000..9b099150a7af
--- /dev/null
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -0,0 +1,96 @@
+//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+
+using namespace llvm;
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+/// (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+/// 0 A > B
+/// 1 A == B
+/// 2 A < B
+///
+/// <=> Value Definition
+/// 000 0 Always false
+/// 001 1 A > B
+/// 010 2 A == B
+/// 011 3 A >= B
+/// 100 4 A < B
+/// 101 5 A != B
+/// 110 6 A <= B
+/// 111 7 Always true
+///
+unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
+ ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
+ : ICI->getPredicate();
+ switch (Pred) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or the
+/// predicate for a new ICmp instruction. The sign is passed in to determine
+/// which kind of predicate to use in the new icmp instruction.
+/// Non-NULL return value will be a true or false constant.
+/// NULL return means a new ICmp is needed. The predicate for which is
+/// output in NewICmpPred.
+Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ CmpInst::Predicate &NewICmpPred) {
+ switch (Code) {
+ default: llvm_unreachable("Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
+ case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
+ case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return NULL;
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 5f47ebb78202..e8c0b80c2126 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -615,9 +615,10 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
default:
// Otherwise, make the default destination of the switch instruction be one
// of the other successors.
- TheSwitch->setOperand(0, call);
- TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
- TheSwitch->removeCase(NumExitBlocks); // Remove redundant case
+ TheSwitch->setCondition(call);
+ TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
+ // Remove redundant case
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
}
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 8cc26492c292..99b58301634a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -6,21 +6,12 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file provide the function DemoteRegToStack(). This function takes a
-// virtual register computed by an Instruction and replaces it with a slot in
-// the stack frame, allocated via alloca. It returns the pointer to the
-// AllocaInst inserted. After this function is called on an instruction, we are
-// guaranteed that the only user of the instruction is a store that is
-// immediately after it.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Type.h"
-#include <map>
+#include "llvm/ADT/DenseMap.h"
using namespace llvm;
/// DemoteRegToStack - This function takes a virtual register computed by an
@@ -28,8 +19,7 @@ using namespace llvm;
/// alloca. This allows the CFG to be changed around without fear of
/// invalidating the SSA information for the value. It returns the pointer to
/// the alloca inserted to create a stack slot for I.
-///
-AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Instruction *AllocaPoint) {
if (I.use_empty()) {
I.eraseFromParent();
@@ -47,21 +37,20 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
F->getEntryBlock().begin());
}
- // Change all of the users of the instruction to read from the stack slot
- // instead.
+ // Change all of the users of the instruction to read from the stack slot.
while (!I.use_empty()) {
Instruction *U = cast<Instruction>(I.use_back());
if (PHINode *PN = dyn_cast<PHINode>(U)) {
// If this is a PHI node, we can't insert a load of the value before the
- // use. Instead, insert the load in the predecessor block corresponding
+ // use. Instead insert the load in the predecessor block corresponding
// to the incoming value.
//
// Note that if there are multiple edges from a basic block to this PHI
- // node that we cannot multiple loads. The problem is that the resultant
- // PHI node will have multiple values (from each load) coming in from the
- // same block, which is illegal SSA form. For this reason, we keep track
- // and reuse loads we insert.
- std::map<BasicBlock*, Value*> Loads;
+ // node that we cannot have multiple loads. The problem is that the
+ // resulting PHI node will have multiple values (from each load) coming in
+ // from the same block, which is illegal SSA form. For this reason, we
+ // keep track of and reuse loads we insert.
+ DenseMap<BasicBlock*, Value*> Loads;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I) {
Value *&V = Loads[PN->getIncomingBlock(i)];
@@ -81,9 +70,9 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
}
- // Insert stores of the computed value into the stack slot. We have to be
- // careful is I is an invoke instruction though, because we can't insert the
- // store AFTER the terminator instruction.
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful if I is an invoke instruction, because we can't insert the store
+ // AFTER the terminator instruction.
BasicBlock::iterator InsertPt;
if (!isa<TerminatorInst>(I)) {
InsertPt = &I;
@@ -97,18 +86,17 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
InsertPt = II.getNormalDest()->begin();
}
- for (; isa<PHINode>(InsertPt); ++InsertPt)
- /* empty */; // Don't insert before any PHI nodes.
- new StoreInst(&I, Slot, InsertPt);
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+ new StoreInst(&I, Slot, InsertPt);
return Slot;
}
-
-/// DemotePHIToStack - This function takes a virtual register computed by a phi
-/// node and replaces it with a slot in the stack frame, allocated via alloca.
-/// The phi node is deleted and it returns the pointer to the alloca inserted.
-AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+/// DemotePHIToStack - This function takes a virtual register computed by a PHI
+/// node and replaces it with a slot in the stack frame allocated via alloca.
+/// The PHI node is deleted. It returns the pointer to the alloca inserted.
+AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
if (P->use_empty()) {
P->eraseFromParent();
return 0;
@@ -125,7 +113,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
F->getEntryBlock().begin());
}
- // Iterate over each operand, insert store in each predecessor.
+ // Iterate over each operand inserting a store in each predecessor.
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
assert(II->getParent() != P->getIncomingBlock(i) &&
@@ -135,12 +123,11 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
P->getIncomingBlock(i)->getTerminator());
}
- // Insert load in place of the phi and replace all uses.
+ // Insert a load in place of the PHI and replace all uses.
Value *V = new LoadInst(Slot, P->getName()+".reload", P);
P->replaceAllUsesWith(V);
- // Delete phi.
+ // Delete PHI.
P->eraseFromParent();
-
return Slot;
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5464dbc4a830..d2b167acb0e7 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -10,13 +10,6 @@
// This file implements inlining of a function into a call site, resolving
// parameters and the return value as appropriate.
//
-// The code in this file for handling inlines through invoke
-// instructions preserves semantics only under some assumptions about
-// the behavior of unwinders which correspond to gcc-style libUnwind
-// exception personality functions. Eventually the IR will be
-// improved to make this unnecessary, but until then, this code is
-// marked [LIBUNWIND].
-//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
@@ -38,271 +31,52 @@
#include "llvm/Support/IRBuilder.h"
using namespace llvm;
-bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
- return InlineFunction(CallSite(CI), IFI);
-}
-bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
- return InlineFunction(CallSite(II), IFI);
-}
-
-// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is
-// turned on.
-
-/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
-static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
- for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
- EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i);
- if (exn) return exn;
- }
-
- return 0;
-}
-
-/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for
-/// the given llvm.eh.exception call.
-static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) {
- BasicBlock *exnBlock = exn->getParent();
-
- EHSelectorInst *outOfBlockSelector = 0;
- for (Instruction::use_iterator
- ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) {
- EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui);
- if (!sel) continue;
-
- // Immediately accept an eh.selector in the same block as the
- // excepton call.
- if (sel->getParent() == exnBlock) return sel;
-
- // Otherwise, use the first selector we see.
- if (!outOfBlockSelector) outOfBlockSelector = sel;
- }
-
- return outOfBlockSelector;
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ return InlineFunction(CallSite(CI), IFI, InsertLifetime);
}
-
-/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector
-/// in the given landing pad. In principle, llvm.eh.exception is
-/// required to be in the landing pad; in practice, SplitCriticalEdge
-/// can break that invariant, and then inlining can break it further.
-/// There's a real need for a reliable solution here, but until that
-/// happens, we have some fragile workarounds here.
-static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) {
- // Look for an exception call in the actual landing pad.
- EHExceptionInst *exn = findExceptionInBlock(lpad);
- if (exn) return findSelectorForException(exn);
-
- // Okay, if that failed, look for one in an obvious successor. If
- // we find one, we'll fix the IR by moving things back to the
- // landing pad.
-
- bool dominates = true; // does the lpad dominate the exn call
- BasicBlock *nonDominated = 0; // if not, the first non-dominated block
- BasicBlock *lastDominated = 0; // and the block which branched to it
-
- BasicBlock *exnBlock = lpad;
-
- // We need to protect against lpads that lead into infinite loops.
- SmallPtrSet<BasicBlock*,4> visited;
- visited.insert(exnBlock);
-
- do {
- // We're not going to apply this hack to anything more complicated
- // than a series of unconditional branches, so if the block
- // doesn't terminate in an unconditional branch, just fail. More
- // complicated cases can arise when, say, sinking a call into a
- // split unwind edge and then inlining it; but that can do almost
- // *anything* to the CFG, including leaving the selector
- // completely unreachable. The only way to fix that properly is
- // to (1) prohibit transforms which move the exception or selector
- // values away from the landing pad, e.g. by producing them with
- // instructions that are pinned to an edge like a phi, or
- // producing them with not-really-instructions, and (2) making
- // transforms which split edges deal with that.
- BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back());
- if (!branch || branch->isConditional()) return 0;
-
- BasicBlock *successor = branch->getSuccessor(0);
-
- // Fail if we found an infinite loop.
- if (!visited.insert(successor)) return 0;
-
- // If the successor isn't dominated by exnBlock:
- if (!successor->getSinglePredecessor()) {
- // We don't want to have to deal with threading the exception
- // through multiple levels of phi, so give up if we've already
- // followed a non-dominating edge.
- if (!dominates) return 0;
-
- // Otherwise, remember this as a non-dominating edge.
- dominates = false;
- nonDominated = successor;
- lastDominated = exnBlock;
- }
-
- exnBlock = successor;
-
- // Can we stop here?
- exn = findExceptionInBlock(exnBlock);
- } while (!exn);
-
- // Look for a selector call for the exception we found.
- EHSelectorInst *selector = findSelectorForException(exn);
- if (!selector) return 0;
-
- // The easy case is when the landing pad still dominates the
- // exception call, in which case we can just move both calls back to
- // the landing pad.
- if (dominates) {
- selector->moveBefore(lpad->getFirstNonPHI());
- exn->moveBefore(selector);
- return selector;
- }
-
- // Otherwise, we have to split at the first non-dominating block.
- // The CFG looks basically like this:
- // lpad:
- // phis_0
- // insnsAndBranches_1
- // br label %nonDominated
- // nonDominated:
- // phis_2
- // insns_3
- // %exn = call i8* @llvm.eh.exception()
- // insnsAndBranches_4
- // %selector = call @llvm.eh.selector(i8* %exn, ...
- // We need to turn this into:
- // lpad:
- // phis_0
- // %exn0 = call i8* @llvm.eh.exception()
- // %selector0 = call @llvm.eh.selector(i8* %exn0, ...
- // insnsAndBranches_1
- // br label %split // from lastDominated
- // nonDominated:
- // phis_2 (without edge from lastDominated)
- // %exn1 = call i8* @llvm.eh.exception()
- // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ...
- // br label %split
- // split:
- // phis_2 (edge from lastDominated, edge from split)
- // %exn = phi ...
- // %selector = phi ...
- // insns_3
- // insnsAndBranches_4
-
- assert(nonDominated);
- assert(lastDominated);
-
- // First, make clones of the intrinsics to go in lpad.
- EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone());
- EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone());
- lpadSelector->setArgOperand(0, lpadExn);
- lpadSelector->insertBefore(lpad->getFirstNonPHI());
- lpadExn->insertBefore(lpadSelector);
-
- // Split the non-dominated block.
- BasicBlock *split =
- nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(),
- nonDominated->getName() + ".lpad-fix");
-
- // Redirect the last dominated branch there.
- cast<BranchInst>(lastDominated->back()).setSuccessor(0, split);
-
- // Move the existing intrinsics to the end of the old block.
- selector->moveBefore(&nonDominated->back());
- exn->moveBefore(selector);
-
- Instruction *splitIP = &split->front();
-
- // For all the phis in nonDominated, make a new phi in split to join
- // that phi with the edge from lastDominated.
- for (BasicBlock::iterator
- i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) {
- PHINode *phi = dyn_cast<PHINode>(i);
- if (!phi) break;
-
- PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(),
- splitIP);
- phi->replaceAllUsesWith(splitPhi);
- splitPhi->addIncoming(phi, nonDominated);
- splitPhi->addIncoming(phi->removeIncomingValue(lastDominated),
- lastDominated);
- }
-
- // Make new phis for the exception and selector.
- PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP);
- exn->replaceAllUsesWith(exnPhi);
- selector->setArgOperand(0, exn); // except for this use
- exnPhi->addIncoming(exn, nonDominated);
- exnPhi->addIncoming(lpadExn, lastDominated);
-
- PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP);
- selector->replaceAllUsesWith(selectorPhi);
- selectorPhi->addIncoming(selector, nonDominated);
- selectorPhi->addIncoming(lpadSelector, lastDominated);
-
- return lpadSelector;
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
+ return InlineFunction(CallSite(II), IFI, InsertLifetime);
}
namespace {
/// A class for recording information about inlining through an invoke.
class InvokeInliningInfo {
- BasicBlock *OuterUnwindDest;
- EHSelectorInst *OuterSelector;
- BasicBlock *InnerUnwindDest;
- PHINode *InnerExceptionPHI;
- PHINode *InnerSelectorPHI;
- SmallVector<Value*, 8> UnwindDestPHIValues;
-
- // FIXME: New EH - These will replace the analogous ones above.
BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind.
BasicBlock *InnerResumeDest; //< Destination for the callee's resume.
LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke.
PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts.
+ SmallVector<Value*, 8> UnwindDestPHIValues;
public:
InvokeInliningInfo(InvokeInst *II)
- : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
- InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0),
- OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
CallerLPad(0), InnerEHValuesPHI(0) {
// If there are PHI nodes in the unwind destination block, we need to keep
// track of which values came into them from the invoke before removing
// the edge from this block.
llvm::BasicBlock *InvokeBB = II->getParent();
- BasicBlock::iterator I = OuterUnwindDest->begin();
+ BasicBlock::iterator I = OuterResumeDest->begin();
for (; isa<PHINode>(I); ++I) {
// Save the value to use for this edge.
PHINode *PHI = cast<PHINode>(I);
UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
}
- // FIXME: With the new EH, this if/dyn_cast should be a 'cast'.
- if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) {
- CallerLPad = LPI;
- }
+ CallerLPad = cast<LandingPadInst>(I);
}
- /// The outer unwind destination is the target of unwind edges
- /// introduced for calls within the inlined function.
- BasicBlock *getOuterUnwindDest() const {
- return OuterUnwindDest;
+ /// getOuterResumeDest - The outer unwind destination is the target of
+ /// unwind edges introduced for calls within the inlined function.
+ BasicBlock *getOuterResumeDest() const {
+ return OuterResumeDest;
}
- EHSelectorInst *getOuterSelector() {
- if (!OuterSelector)
- OuterSelector = findSelectorForLandingPad(OuterUnwindDest);
- return OuterSelector;
- }
-
- BasicBlock *getInnerUnwindDest();
-
- // FIXME: New EH - Rename when new EH is turned on.
- BasicBlock *getInnerUnwindDestNewEH();
+ BasicBlock *getInnerResumeDest();
LandingPadInst *getLandingPadInst() const { return CallerLPad; }
- bool forwardEHResume(CallInst *call, BasicBlock *src);
-
/// forwardResume - Forward the 'resume' instruction to the caller's landing
/// pad block. When the landing pad block has only one predecessor, this is
/// a simple branch. When there is more than one predecessor, we need to
@@ -314,7 +88,7 @@ namespace {
/// destination block for the given basic block, using the values for the
/// original invoke's source block.
void addIncomingPHIValuesFor(BasicBlock *BB) const {
- addIncomingPHIValuesForInto(BB, OuterUnwindDest);
+ addIncomingPHIValuesForInto(BB, OuterResumeDest);
}
void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
@@ -327,113 +101,8 @@ namespace {
};
}
-/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to
-/// llvm.eh.resume.
-BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
- if (InnerUnwindDest) return InnerUnwindDest;
-
- // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
- // in the outer landing pad to immediately following the phis.
- EHSelectorInst *selector = getOuterSelector();
- if (!selector) return 0;
-
- // The call to llvm.eh.exception *must* be in the landing pad.
- Instruction *exn = cast<Instruction>(selector->getArgOperand(0));
- assert(exn->getParent() == OuterUnwindDest);
-
- // TODO: recognize when we've already done this, so that we don't
- // get a linear number of these when inlining calls into lots of
- // invokes with the same landing pad.
-
- // Do the hoisting.
- Instruction *splitPoint = exn->getParent()->getFirstNonPHI();
- assert(splitPoint != selector && "selector-on-exception dominance broken!");
- if (splitPoint == exn) {
- selector->removeFromParent();
- selector->insertAfter(exn);
- splitPoint = selector->getNextNode();
- } else {
- exn->moveBefore(splitPoint);
- selector->moveBefore(splitPoint);
- }
-
- // Split the landing pad.
- InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint,
- OuterUnwindDest->getName() + ".body");
-
- // The number of incoming edges we expect to the inner landing pad.
- const unsigned phiCapacity = 2;
-
- // Create corresponding new phis for all the phis in the outer landing pad.
- BasicBlock::iterator insertPoint = InnerUnwindDest->begin();
- BasicBlock::iterator I = OuterUnwindDest->begin();
- for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
- PHINode *outerPhi = cast<PHINode>(I);
- PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity,
- outerPhi->getName() + ".lpad-body",
- insertPoint);
- outerPhi->replaceAllUsesWith(innerPhi);
- innerPhi->addIncoming(outerPhi, OuterUnwindDest);
- }
-
- // Create a phi for the exception value...
- InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity,
- "exn.lpad-body", insertPoint);
- exn->replaceAllUsesWith(InnerExceptionPHI);
- selector->setArgOperand(0, exn); // restore this use
- InnerExceptionPHI->addIncoming(exn, OuterUnwindDest);
-
- // ...and the selector.
- InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity,
- "selector.lpad-body", insertPoint);
- selector->replaceAllUsesWith(InnerSelectorPHI);
- InnerSelectorPHI->addIncoming(selector, OuterUnwindDest);
-
- // All done.
- return InnerUnwindDest;
-}
-
-/// [LIBUNWIND] Try to forward the given call, which logically occurs
-/// at the end of the given block, as a branch to the inner unwind
-/// block. Returns true if the call was forwarded.
-bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
- // First, check whether this is a call to the intrinsic.
- Function *fn = dyn_cast<Function>(call->getCalledValue());
- if (!fn || fn->getName() != "llvm.eh.resume")
- return false;
-
- // At this point, we need to return true on all paths, because
- // otherwise we'll construct an invoke of the intrinsic, which is
- // not well-formed.
-
- // Try to find or make an inner unwind dest, which will fail if we
- // can't find a selector call for the outer unwind dest.
- BasicBlock *dest = getInnerUnwindDest();
- bool hasSelector = (dest != 0);
-
- // If we failed, just use the outer unwind dest, dropping the
- // exception and selector on the floor.
- if (!hasSelector)
- dest = OuterUnwindDest;
-
- // Make a branch.
- BranchInst::Create(dest, src);
-
- // Update the phis in the destination. They were inserted in an
- // order which makes this work.
- addIncomingPHIValuesForInto(src, dest);
-
- if (hasSelector) {
- InnerExceptionPHI->addIncoming(call->getArgOperand(0), src);
- InnerSelectorPHI->addIncoming(call->getArgOperand(1), src);
- }
-
- return true;
-}
-
-/// Get or create a target for the branch from ResumeInsts.
-BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
- // FIXME: New EH - rename this function when new EH is turned on.
+/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
if (InnerResumeDest) return InnerResumeDest;
// Split the landing pad.
@@ -472,7 +141,7 @@ BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
- BasicBlock *Dest = getInnerUnwindDestNewEH();
+ BasicBlock *Dest = getInnerResumeDest();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -485,14 +154,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
RI->eraseFromParent();
}
-/// [LIBUNWIND] Check whether this selector is "only cleanups":
-/// call i32 @llvm.eh.selector(blah, blah, i32 0)
-static bool isCleanupOnlySelector(EHSelectorInst *selector) {
- if (selector->getNumArgOperands() != 3) return false;
- ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2));
- return (val && val->isZero());
-}
-
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
/// an invoke, we have to turn all of the calls that can throw into
/// invokes. This function analyze BB to see if there are any calls, and if so,
@@ -507,77 +168,34 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
- if (LPI) // FIXME: New EH - This won't be NULL in the new EH.
- if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
- unsigned NumClauses = LPI->getNumClauses();
- L->reserveClauses(NumClauses);
- for (unsigned i = 0; i != NumClauses; ++i)
- L->addClause(LPI->getClause(i));
- }
+ if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+ unsigned NumClauses = LPI->getNumClauses();
+ L->reserveClauses(NumClauses);
+ for (unsigned i = 0; i != NumClauses; ++i)
+ L->addClause(LPI->getClause(i));
+ }
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
- if (CI == 0) continue;
-
- // LIBUNWIND: merge selector instructions.
- if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) {
- EHSelectorInst *Outer = Invoke.getOuterSelector();
- if (!Outer) continue;
-
- bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner);
- bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer);
-
- // If both selectors contain only cleanups, we don't need to do
- // anything. TODO: this is really just a very specific instance
- // of a much more general optimization.
- if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue;
-
- // Otherwise, we just append the outer selector to the inner selector.
- SmallVector<Value*, 16> NewSelector;
- for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i)
- NewSelector.push_back(Inner->getArgOperand(i));
- for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
- NewSelector.push_back(Outer->getArgOperand(i));
-
- CallInst *NewInner =
- IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
- // No need to copy attributes, calling convention, etc.
- NewInner->takeName(Inner);
- Inner->replaceAllUsesWith(NewInner);
- Inner->eraseFromParent();
- continue;
- }
-
+
// If this call cannot unwind, don't convert it to an invoke.
- if (CI->doesNotThrow())
+ if (!CI || CI->doesNotThrow())
continue;
-
- // Convert this function call into an invoke instruction.
- // First, split the basic block.
+
+ // Convert this function call into an invoke instruction. First, split the
+ // basic block.
BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
// Delete the unconditional branch inserted by splitBasicBlock
BB->getInstList().pop_back();
- // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
- // directly to the new landing pad.
- if (Invoke.forwardEHResume(CI, BB)) {
- // TODO: 'Split' is now unreachable; clean it up.
-
- // We want to leave the original call intact so that the call
- // graph and other structures won't get misled. We also have to
- // avoid processing the next block, or we'll iterate here forever.
- return true;
- }
-
- // Otherwise, create the new invoke instruction.
+ // Create the new invoke instruction.
ImmutableCallSite CS(CI);
SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
- InvokeInst *II =
- InvokeInst::Create(CI->getCalledValue(), Split,
- Invoke.getOuterUnwindDest(),
- InvokeArgs, CI->getName(), BB);
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
+ Invoke.getOuterResumeDest(),
+ InvokeArgs, CI->getName(), BB);
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -585,21 +203,20 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// updates the CallGraph if present, because it uses a WeakVH.
CI->replaceAllUsesWith(II);
- Split->getInstList().pop_front(); // Delete the original call
+ // Delete the original call
+ Split->getInstList().pop_front();
- // Update any PHI nodes in the exceptional block to indicate that
- // there is now a new entry in them.
+ // Update any PHI nodes in the exceptional block to indicate that there is
+ // now a new entry in them.
Invoke.addIncomingPHIValuesFor(BB);
return false;
}
return false;
}
-
/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
-/// in the body of the inlined function into invokes and turn unwind
-/// instructions into branches to the invoke unwind dest.
+/// in the body of the inlined function into invokes.
///
/// II is the invoke instruction being inlined. FirstNewBlock is the first
/// block of the inlined code (the last block is the end of the function),
@@ -614,7 +231,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// start of the inlined code to its end, checking for stuff we need to
// rewrite. If the code doesn't have calls or unwinds, we know there is
// nothing to rewrite.
- if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+ if (!InlinedCodeInfo.ContainsCalls) {
// Now that everything is happy, we have one final detail. The PHI nodes in
// the exception destination block still have entries due to the original
// invoke instruction. Eliminate these entries (which might even delete the
@@ -628,30 +245,13 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
- // Honor a request to skip the next block. We don't need to
- // consider UnwindInsts in this case either.
+ // Honor a request to skip the next block.
++BB;
continue;
}
- if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- // An UnwindInst requires special handling when it gets inlined into an
- // invoke site. Once this happens, we know that the unwind would cause
- // a control transfer to the invoke exception destination, so we can
- // transform it into a direct branch to the exception destination.
- BranchInst::Create(InvokeDest, UI);
-
- // Delete the unwind instruction!
- UI->eraseFromParent();
-
- // Update any PHI nodes in the exceptional block to indicate that
- // there is now a new entry in them.
- Invoke.addIncomingPHIValuesFor(BB);
- }
-
- if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
Invoke.forwardResume(RI);
- }
}
// Now that everything is happy, we have one final detail. The PHI nodes in
@@ -836,8 +436,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
return false;
}
-/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
-/// update InlinedAtEntry of a DebugLoc.
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
+/// recursively update InlinedAtEntry of a DebugLoc.
static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
const DebugLoc &InlinedAtDL,
LLVMContext &Ctx) {
@@ -847,16 +447,15 @@ static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
NewInlinedAtDL.getAsMDNode(Ctx));
}
-
+
return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
InlinedAtDL.getAsMDNode(Ctx));
}
-
/// fixupLineNumbers - Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
- Instruction *TheCall) {
+ Instruction *TheCall) {
DebugLoc TheCallDL = TheCall->getDebugLoc();
if (TheCallDL.isUnknown())
return;
@@ -878,18 +477,18 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
-// InlineFunction - This function inlines the called function into the basic
-// block of the caller. This returns false if it is not possible to inline this
-// call. The program is still in a well defined state if this occurs though.
-//
-// Note that this only does one level of inlining. For example, if the
-// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
-// exists in the instruction stream. Similarly this will inline a recursive
-// function by one level.
-//
-bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+/// InlineFunction - This function inlines the called function into the basic
+/// block of the caller. This returns false if it is not possible to inline
+/// this call. The program is still in a well defined state if this occurs
+/// though.
+///
+/// Note that this only does one level of inlining. For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream. Similarly this will inline a recursive
+/// function by one level.
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ bool InsertLifetime) {
Instruction *TheCall = CS.getInstruction();
- LLVMContext &Context = TheCall->getContext();
assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
"Instruction not in function!");
@@ -924,43 +523,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
return false;
}
- // Find the personality function used by the landing pads of the caller. If it
- // exists, then check to see that it matches the personality function used in
- // the callee.
- for (Function::const_iterator
- I = Caller->begin(), E = Caller->end(); I != E; ++I)
+ // Get the personality function from the callee if it contains a landing pad.
+ Value *CalleePersonality = 0;
+ for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+ I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'isa' here should become go away once the new EH system is
- // in place.
- if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
- continue;
- const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
- const Value *CallerPersFn = LP->getPersonalityFn();
-
- // If the personality functions match, then we can perform the
- // inlining. Otherwise, we can't inline.
- // TODO: This isn't 100% true. Some personality functions are proper
- // supersets of others and can be used in place of the other.
- for (Function::const_iterator
- I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
- if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
- const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
- // the new EH system is in place.
- if (const LandingPadInst *LP =
- dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
- if (CallerPersFn != LP->getPersonalityFn())
- return false;
- break;
- }
-
+ const LandingPadInst *LP = BB->getLandingPadInst();
+ CalleePersonality = LP->getPersonalityFn();
break;
}
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ if (CalleePersonality) {
+ for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ const LandingPadInst *LP = BB->getLandingPadInst();
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ if (LP->getPersonalityFn() != CalleePersonality)
+ return false;
+
+ break;
+ }
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
- //
Function::iterator LastBlock = &Caller->back();
// Make sure to capture all of the return instructions from the cloned
@@ -987,7 +583,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// by them explicit. However, we don't do this if the callee is readonly
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
- if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+ if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo+1));
@@ -1023,7 +619,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// block for the callee, move them to the entry block of the caller. First
// calculate which instruction they should be inserted before. We insert the
// instructions at the end of the current alloca list.
- //
{
BasicBlock::iterator InsertPoint = Caller->begin()->begin();
for (BasicBlock::iterator I = FirstNewBlock->begin(),
@@ -1063,7 +658,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
- if (!IFI.StaticAllocas.empty()) {
+ if (InsertLifetime && !IFI.StaticAllocas.empty()) {
IRBuilder<> builder(FirstNewBlock->begin());
for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
AllocaInst *AI = IFI.StaticAllocas[ai];
@@ -1098,20 +693,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
}
-
- // Count the number of StackRestore calls we insert.
- unsigned NumStackRestores = Returns.size();
-
- // If we are inlining an invoke instruction, insert restores before each
- // unwind. These unwinds will be rewritten into branches later.
- if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
- for (Function::iterator BB = FirstNewBlock, E = Caller->end();
- BB != E; ++BB)
- if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
- ++NumStackRestores;
- }
- }
}
// If we are inlining tail call instruction through a call site that isn't
@@ -1131,21 +712,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
}
}
- // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
- // instructions are unreachable.
- if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
- for (Function::iterator BB = FirstNewBlock, E = Caller->end();
- BB != E; ++BB) {
- TerminatorInst *Term = BB->getTerminator();
- if (isa<UnwindInst>(Term)) {
- new UnreachableInst(Context, Term);
- BB->getInstList().erase(Term);
- }
- }
-
// If we are inlining for an invoke instruction, we must make sure to rewrite
- // any inlined 'unwind' instructions into branches to the invoke exception
- // destination, and call instructions into invoke instructions.
+ // any call instructions into invoke instructions.
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
@@ -1308,11 +876,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// If we inserted a phi node, check to see if it has a single value (e.g. all
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
- if (PHI)
+ if (PHI) {
if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
+ }
return true;
}
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..88b2ffee64da
--- /dev/null
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/Utils/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = TransformUtils
+parent = Transforms
+required_libraries = Analysis Core IPA Support Target
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 7034feb227ad..d1c4d5968231 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
@@ -105,33 +106,32 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
// If we are switching on a constant, we can convert the switch into a
// single branch instruction!
ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
- BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest
+ BasicBlock *TheOnlyDest = SI->getDefaultDest();
BasicBlock *DefaultDest = TheOnlyDest;
- assert(TheOnlyDest == SI->getDefaultDest() &&
- "Default destination is not successor #0?");
// Figure out which case it goes to.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
// Found case matching a constant operand?
- if (SI->getSuccessorValue(i) == CI) {
- TheOnlyDest = SI->getSuccessor(i);
+ if (i.getCaseValue() == CI) {
+ TheOnlyDest = i.getCaseSuccessor();
break;
}
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
- if (SI->getSuccessor(i) == DefaultDest) {
+ if (i.getCaseSuccessor() == DefaultDest) {
// Remove this entry.
DefaultDest->removePredecessor(SI->getParent());
SI->removeCase(i);
- --i; --e; // Don't skip an entry...
+ --i; --e;
continue;
}
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+ if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
}
if (CI && !TheOnlyDest) {
@@ -165,14 +165,16 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
return true;
}
- if (SI->getNumSuccessors() == 2) {
+ if (SI->getNumCases() == 1) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
+ SwitchInst::CaseIt FirstCase = SI->case_begin();
Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- SI->getSuccessorValue(1), "cond");
+ FirstCase.getCaseValue(), "cond");
// Insert the new branch.
- Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0));
+ Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
// Delete the old switch.
SI->eraseFromParent();
@@ -257,6 +259,13 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
II->getIntrinsicID() == Intrinsic::lifetime_end)
return isa<UndefValue>(II->getArgOperand(1));
}
+
+ if (extractMallocCall(I)) return true;
+
+ if (CallInst *CI = isFreeCall(I))
+ if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
+ return C->isNullValue() || isa<UndefValue>(C);
+
return false;
}
@@ -346,22 +355,27 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
/// instructions in other blocks as well in this block.
bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
bool MadeChange = false;
- for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+
+#ifndef NDEBUG
+ // In debug builds, ensure that the terminator of the block is never replaced
+ // or deleted by these simplifications. The idea of simplification is that it
+ // cannot introduce new instructions, and there is no way to replace the
+ // terminator of a block without introducing a new instruction.
+ AssertingVH<Instruction> TerminatorVH(--BB->end());
+#endif
+
+ for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+ assert(!BI->isTerminator());
Instruction *Inst = BI++;
-
- if (Value *V = SimplifyInstruction(Inst, TD)) {
- WeakVH BIHandle(BI);
- ReplaceAndSimplifyAllUses(Inst, V, TD);
+
+ WeakVH BIHandle(BI);
+ if (recursivelySimplifyInstruction(Inst, TD)) {
MadeChange = true;
if (BIHandle != BI)
BI = BB->begin();
continue;
}
- if (Inst->isTerminator())
- break;
-
- WeakVH BIHandle(BI);
MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
if (BIHandle != BI)
BI = BB->begin();
@@ -399,17 +413,11 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
WeakVH PhiIt = &BB->front();
while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+ Value *OldPhiIt = PhiIt;
- Value *PNV = SimplifyInstruction(PN, TD);
- if (PNV == 0) continue;
+ if (!recursivelySimplifyInstruction(PN, TD))
+ continue;
- // If we're able to simplify the phi to a single value, substitute the new
- // value into all of its uses.
- assert(PNV != PN && "SimplifyInstruction broken!");
-
- Value *OldPhiIt = PhiIt;
- ReplaceAndSimplifyAllUses(PN, PNV, TD);
-
// If recursive simplification ended up deleting the next PHI node we would
// iterate to, then our iterator is invalid, restart scanning from the top
// of the block.
@@ -486,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
if (Succ->getSinglePredecessor()) return true;
// Make a list of the predecessors of BB
- typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
- BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
- // Use that list to make another list of common predecessors of BB and Succ
- BlockSet CommonPreds;
- for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (BBPreds.count(P))
- CommonPreds.insert(P);
- }
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
- // Shortcut, if there are no common predecessors, merging is always safe
- if (CommonPreds.empty())
- return true;
-
// Look at all the phi nodes in Succ, to see if they present a conflict when
// merging these blocks
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -512,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// merge the phi nodes and then the blocks can still be merged
PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
if (BBPN && BBPN->getParent() == BB) {
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- if (BBPN->getIncomingValueForBlock(*PI)
- != PN->getIncomingValueForBlock(*PI)) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with "
<< BBPN->getName() << " with regard to common predecessor "
- << (*PI)->getName() << "\n");
+ << IBB->getName() << "\n");
return false;
}
}
} else {
Value* Val = PN->getIncomingValueForBlock(BB);
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
// See if the incoming value for the common predecessor is equal to the
// one for BB, in which case this phi node will not prevent the merging
// of the block.
- if (Val != PN->getIncomingValueForBlock(*PI)) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << (*PI)->getName() << "\n");
+ << "predecessor " << IBB->getName() << "\n");
return false;
}
}
@@ -740,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (GV->isDeclaration()) return Align;
+ // If the memory we set aside for the global may not be the memory used by
+ // the final program then it is impossible for us to reliably enforce the
+ // preferred alignment.
+ if (GV->isWeakForLinker()) return Align;
if (GV->getAlignment() >= PrefAlign)
return GV->getAlignment();
@@ -764,9 +762,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ ComputeMaskedBits(V, KnownZero, KnownOne, TD);
unsigned TrailZ = KnownZero.countTrailingOnes();
// Avoid trouble with rediculously large TrailZ values, such as
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cbd54a8dcbf8..0bc185d8b722 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -99,7 +99,8 @@ namespace {
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
BasicBlock *InsertPreheaderForLoop(Loop *L);
- Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
void PlaceSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock*> &SplitPreds,
@@ -240,7 +241,7 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (SeparateNestedLoop(L, LPM)) {
+ if (SeparateNestedLoop(L, LPM, Preheader)) {
++NumNested;
// This is a big restructuring change, reprocess the whole loop.
Changed = true;
@@ -265,7 +266,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
}
// Split out the loop pre-header.
- BasicBlock *NewBB =
- SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
- ".preheader", this);
+ BasicBlock *PreheaderBB;
+ if (!Header->isLandingPad()) {
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ ".split-lp", this, NewBBs);
+ PreheaderBB = NewBBs[0];
+ }
- NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
- << "\n");
+ PreheaderBB->getTerminator()->setDebugLoc(
+ Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+ PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
- return NewBB;
+ return PreheaderBB;
}
/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
this, NewBBs);
NewExitBB = NewBBs[0];
} else {
- NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
- LoopBlocks.size(), ".loopexit",
- this);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
}
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -529,7 +536,16 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader) {
+ // Don't try to separate loops without a preheader.
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!L->getHeader()->isLandingPad() &&
+ "Can't insert backedge to landing pad");
+
PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
if (PN == 0) return 0; // No known way to partition.
@@ -537,16 +553,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
// handles the case when a PHI node has multiple instances of itself as
// arguments.
SmallVector<BasicBlock*, 8> OuterLoopPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) != PN ||
!L->contains(PN->getIncomingBlock(i))) {
// We can't split indirectbr edges.
if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
return 0;
-
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
-
+ }
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
// If ScalarEvolution is around and knows anything about values in
@@ -556,9 +571,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SE->forgetLoop(L);
BasicBlock *Header = L->getHeader();
- BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
- OuterLoopPreds.size(),
- ".outer", this);
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -640,6 +654,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
if (!Preheader)
return 0;
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 62e4fa295378..e15497a77ae3 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+ bool AllowRuntime, unsigned TripMultiple,
+ LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -148,6 +149,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return false;
}
+ // Loops with indirectbr cannot be cloned.
+ if (!L->isSafeToClone()) {
+ DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
+ return false;
+ }
+
BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
@@ -165,12 +172,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return false;
}
- // Notify ScalarEvolution that the loop will be substantially changed,
- // if not outright eliminated.
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE)
- SE->forgetLoop(L);
-
if (TripCount != 0)
DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
@@ -181,6 +182,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (TripCount != 0 && Count > TripCount)
Count = TripCount;
+ // Don't enter the unroll code if there is nothing to do. This way we don't
+ // need to support "partial unrolling by 1".
+ if (TripCount == 0 && Count < 2)
+ return false;
+
assert(Count > 0);
assert(TripMultiple > 0);
assert(TripCount == 0 || TripCount % TripMultiple == 0);
@@ -188,6 +194,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ return false;
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
+ SE->forgetLoop(L);
+
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
if (TripCount != 0) {
@@ -209,6 +229,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
}
DEBUG(dbgs() << "!\n");
}
@@ -332,6 +354,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll && j == 0) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 000000000000..3aa6befe1f15
--- /dev/null
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,372 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations. Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+ BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+ BasicBlock *OrigPH, BasicBlock *NewPH,
+ ValueToValueMapTy &LVMap, Pass *P) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch != 0 && "Loop must have a latch");
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI name is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+ SBI != SBE; ++SBI) {
+ for (BasicBlock::iterator BBI = (*SBI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+ PrologEnd->getTerminator());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ } else {
+ NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+ }
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = LVMap[I];
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, LastPrologBB);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologEnd);
+ }
+ }
+ }
+
+ // Create a branch around the orignal loop, which is taken if the
+ // trip count is less than the unroll factor.
+ Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *BrLoopExit =
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+ ConstantInt::get(TripCount->getType(), Count));
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ assert(Exit != 0 && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ if (!Exit->isLandingPad()) {
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+ P, NewBBs);
+ }
+ // Add the branch to the exit block (around the unrolled loop)
+ BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+ InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used. VMap is
+/// for the values in the current loop instance. LVMap contains
+/// the values from the last loop instance. We need the LVMap values
+/// to update the inital values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+ bool FirstCopy,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot,
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap,
+ LoopInfo *LI) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ NewBlocks.push_back(NewBB);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+ // Change the incoming values to the ones defined in the
+ // previously cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (FirstCopy) {
+ // We replace the first phi node with the value from the preheader
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ NewBB->getInstList().erase(NewPHI);
+ } else {
+ // Update VMap with values from the previous block
+ unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ if (Instruction *I = dyn_cast<Instruction>(InVal))
+ if (L->contains(I))
+ InVal = LVMap[InVal];
+ NewPHI->setIncomingValue(idx, InVal);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ }
+ }
+ }
+
+ if (Latch == *BB) {
+ VMap.erase((*BB)->getTerminator());
+ NewBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ }
+ }
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ LVMap[VI->first] = VI->second;
+ }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// if (extraiters == loopfactor) jump L1
+/// if (extraiters == loopfactor-1) jump L2
+/// ...
+/// L1: LoopBody;
+/// L2: LoopBody;
+/// ...
+/// if tripcount < loopfactor jump End
+/// Loop:
+/// ...
+/// End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+ LPPassManager *LPM) {
+ // for now, only unroll loops that contain a single exit
+ if (!L->getExitingBlock())
+ return false;
+
+ // Make sure the loop is in canonical form, and there is a single
+ // exit block only.
+ if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ return false;
+
+ // Use Scalar Evolution to compute the trip count. This allows more
+ // loops to be unrolled than relying on induction var simplification
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE == 0)
+ return false;
+
+ // Only unroll loops with a computable trip count and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item)
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ return false;
+
+ // Add 1 since the backedge count doesn't include the first loop iteration
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC))
+ return false;
+
+ // We only handle cases when the unroll factor is a power of 2.
+ // Count is the loop unroll factor, the number of extra copies added + 1.
+ if ((Count & (Count-1)) != 0)
+ return false;
+
+ // If this loop is nested, then the loop unroller changes the code in
+ // parent loop, so the Scalar Evolution pass needs to be run again
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ BasicBlock *PH = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // It helps to splits the original preheader twice, one for the end of the
+ // prolog code and one for a new loop preheader
+ BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % (loop unroll factor + 1)
+ SCEVExpander Expander(*SE, "loop-unroll");
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Type *CountTy = TripCount->getType();
+ BinaryOperator *ModVal =
+ BinaryOperator::CreateURem(TripCount,
+ ConstantInt::get(CountTy, Count),
+ "xtraiter");
+ ModVal->insertBefore(PreHeaderBR);
+
+ // Check if for no extra iterations, then jump to unrolled loop
+ Value *BranchVal = new ICmpInst(PreHeaderBR,
+ ICmpInst::ICMP_NE, ModVal,
+ ConstantInt::get(CountTy, 0), "lcmp");
+ // Branch to either the extra iterations or the unrolled loop
+ // We will fix up the true branch label when adding loop body copies
+ BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
+ "CFG edges in Preheader are not correct");
+ PreHeaderBR->eraseFromParent();
+
+ ValueToValueMapTy LVMap;
+ Function *F = Header->getParent();
+ // These variables are used to update the CFG links in each iteration
+ BasicBlock *CompareBB = 0;
+ BasicBlock *LastLoopBB = PH;
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+ std::vector<BasicBlock*> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop, but we don't clone the loop
+ // This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+ LoopBlocks, VMap, LVMap, LI);
+ LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Generate the code for the comparison which determines if the loop
+ // prolog code needs to be executed.
+ if (leftOverIters == Count-1) {
+ // There is no compare block for the fall-thru case when for the last
+ // left over iteration
+ CompareBB = NewBlocks[0];
+ } else {
+ // Create a new block for the comparison
+ BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+ F, CompareBB);
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ // Add the new block to the parent loop, if needed
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+
+ // The comparison w/ the extra iteration value and branch
+ Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+ ConstantInt::get(CountTy, leftOverIters),
+ "un.tmp");
+ // Branch to either the extra iterations or the unrolled loop
+ BranchInst::Create(NewBlocks[0], CompareBB,
+ BranchVal, NewBB);
+ CompareBB = NewBB;
+ PH->getTerminator()->setSuccessor(0, NewBB);
+ VMap[NewPH] = CompareBB;
+ }
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ }
+ }
+ }
+
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ LPM->getAsPass());
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 61ab3f65330a..c70ced18e45d 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -1,3 +1,16 @@
+//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the 'expect' intrinsic to LLVM metadata.
+//
+//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "lower-expect-intrinsic"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -60,14 +73,17 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
LLVMContext &Context = CI->getContext();
Type *Int32Ty = Type::getInt32Ty(Context);
- unsigned caseNo = SI->findCaseValue(ExpectedValue);
+ SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
std::vector<Value *> Vec;
unsigned n = SI->getNumCases();
- Vec.resize(n + 1); // +1 for MDString
+ Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case
Vec[0] = MDString::get(Context, "branch_weights");
+ Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ?
+ LikelyBranchWeight : UnlikelyBranchWeight);
for (unsigned i = 0; i < n; ++i) {
- Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+ Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ?
+ LikelyBranchWeight : UnlikelyBranchWeight);
}
MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index c96c8fce7b19..930555424ded 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -54,7 +54,6 @@
using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumUnwinds, "Number of unwinds replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
@@ -193,20 +192,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
BB->getInstList().erase(II);
++NumInvokes; Changed = true;
- } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- // Insert a call to abort()
- CallInst::Create(AbortFn, "", UI)->setTailCall();
-
- // Insert a return instruction. This really should be a "barrier", as it
- // is unreachable.
- ReturnInst::Create(F.getContext(),
- F.getReturnType()->isVoidTy() ?
- 0 : Constant::getNullValue(F.getReturnType()), UI);
-
- // Remove the unwind instruction now.
- BB->getInstList().erase(UI);
-
- ++NumUnwinds; Changed = true;
}
return Changed;
}
@@ -404,7 +389,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
SmallVector<ReturnInst*,16> Returns;
- SmallVector<UnwindInst*,16> Unwinds;
SmallVector<InvokeInst*,16> Invokes;
UnreachableInst* UnreachablePlaceholder = 0;
@@ -415,14 +399,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
Returns.push_back(RI);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
Invokes.push_back(II);
- } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- Unwinds.push_back(UI);
}
- if (Unwinds.empty() && Invokes.empty()) return false;
+ if (Invokes.empty()) return false;
NumInvokes += Invokes.size();
- NumUnwinds += Unwinds.size();
// TODO: This is not an optimal way to do this. In particular, this always
// inserts setjmp calls into the entries of functions with invoke instructions
@@ -572,13 +553,6 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
CallInst::Create(AbortFn, "",
TermBlock->getTerminator())->setTailCall();
-
- // Replace all unwinds with a branch to the unwind handler.
- for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
- BranchInst::Create(UnwindHandler, Unwinds[i]);
- Unwinds[i]->eraseFromParent();
- }
-
// Replace the inserted unreachable with a branch to the unwind handler.
if (UnreachablePlaceholder) {
BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 686178ca01cc..a16130d3d74a 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -237,10 +237,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
unsigned numCmps = 0;
// Start with "simple" cases
- for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
- Cases.push_back(CaseRange(SI->getSuccessorValue(i),
- SI->getSuccessorValue(i),
- SI->getSuccessor(i)));
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+ i.getCaseSuccessor()));
+
std::sort(Cases.begin(), Cases.end(), CaseCmp());
// Merge case into clusters
@@ -281,7 +281,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
BasicBlock* Default = SI->getDefaultDest();
// If there is only the default destination, don't bother with the code below.
- if (SI->getNumCases() == 1) {
+ if (!SI->getNumCases()) {
BranchInst::Create(SI->getDefaultDest(), CurBlock);
CurBlock->getInstList().erase(SI);
return;
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 000000000000..8491c5582d97
--- /dev/null
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,64 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+static void appendToGlobalArray(const char *Array,
+ Module &M, Function *F, int Priority) {
+ IRBuilder<> IRB(M.getContext());
+ FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+ StructType *Ty = StructType::get(
+ IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
+
+ Constant *RuntimeCtorInit = ConstantStruct::get(
+ Ty, IRB.getInt32(Priority), F, NULL);
+
+ // Get the current set of static global constructors and add the new ctor
+ // to the list.
+ SmallVector<Constant *, 16> CurrentCtors;
+ if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+ if (Constant *Init = GVCtor->getInitializer()) {
+ unsigned n = Init->getNumOperands();
+ CurrentCtors.reserve(n + 1);
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+ }
+ GVCtor->eraseFromParent();
+ }
+
+ CurrentCtors.push_back(RuntimeCtorInit);
+
+ // Create a new initializer.
+ ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
+ CurrentCtors.size());
+ Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+ // Create the new global variable and replace all uses of
+ // the old global variable with the new one.
+ (void)new GlobalVariable(M, NewInit->getType(), false,
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority);
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index db3e94251350..2357d81916a4 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -41,6 +41,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -66,7 +67,8 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
}
static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
- return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+ using llvm::hash_value;
+ return static_cast<unsigned>(hash_value(Val));
}
static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
return LHS == RHS;
@@ -423,7 +425,8 @@ void PromoteMem2Reg::run() {
// Finally, after the scan, check to see if the store is all that is left.
if (Info.UsingBlocks.empty()) {
- // Record debuginfo for the store and remove the declaration's debuginfo.
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
if (!DIB)
DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
@@ -590,7 +593,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index fa8061c2b44c..e60a41b786a7 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -518,3 +518,10 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
User->eraseFromParent();
}
}
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts)
+ const {
+ return std::find(Insts.begin(), Insts.end(), I) != Insts.end();
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b8c3ab4c6077..66dd2c954e29 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,16 +14,20 @@
#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Operator.h"
#include "llvm/Type.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -63,9 +67,8 @@ class SimplifyCFGOpt {
bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
IRBuilder<> &Builder);
- bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
- bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
+ bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
bool SimplifyIndirectBr(IndirectBrInst *IBI);
@@ -205,6 +208,42 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
return BI->getCondition();
}
+/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// given instruction, which is assumed to be safe to speculate. 1 means
+/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
+static unsigned ComputeSpeculationCost(const User *I) {
+ assert(isSafeToSpeculativelyExecute(I) &&
+ "Instruction is not safe to speculatively execute!");
+ switch (Operator::getOpcode(I)) {
+ default:
+ // In doubt, be conservative.
+ return UINT_MAX;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return UINT_MAX;
+ return 1;
+ case Instruction::Load:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return 1; // These are all cheap.
+
+ case Instruction::Call:
+ case Instruction::Select:
+ return 2;
+ }
+}
+
/// DominatesMergePoint - If we have a merge point of an "if condition" as
/// accepted above, return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -257,46 +296,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!I->isSafeToSpeculativelyExecute())
+ if (!isSafeToSpeculativelyExecute(I))
return false;
- unsigned Cost = 0;
-
- switch (I->getOpcode()) {
- default: return false; // Cannot hoist this out safely.
- case Instruction::Load:
- // We have to check to make sure there are no instructions before the
- // load in its basic block, as we are going to hoist the load out to its
- // predecessor.
- if (PBB->getFirstNonPHIOrDbg() != I)
- return false;
- Cost = 1;
- break;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
- return false;
- Cost = 1;
- break;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::ICmp:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- Cost = 1;
- break; // These are all cheap and non-trapping instructions.
-
- case Instruction::Select:
- Cost = 2;
- break;
- }
+ unsigned Cost = ComputeSpeculationCost(I);
if (Cost > CostRemaining)
return false;
@@ -373,9 +376,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Span = Span.inverse();
// If there are a ton of values, we don't want to make a ginormous switch.
- if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
- // We don't handle wrapped sets yet.
- Span.isWrappedSet())
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet())
return 0;
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
@@ -430,9 +431,9 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
return 0;
}
-
+
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
- Instruction* Cond = 0;
+ Instruction *Cond = 0;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -479,8 +480,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
BasicBlock*> > &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(std::make_pair(i.getCaseValue(),
+ i.getCaseSuccessor()));
return SI->getDefaultDest();
}
@@ -603,11 +605,13 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
- for (unsigned i = SI->getNumCases()-1; i != 0; --i)
- if (DeadCases.count(SI->getCaseValue(i))) {
- SI->getSuccessor(i)->removePredecessor(TI->getParent());
+ for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
+ --i;
+ if (DeadCases.count(i.getCaseValue())) {
+ i.getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
+ }
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
@@ -951,6 +955,20 @@ HoistTerminator:
/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
/// (for now, restricted to a single instruction that's side effect free) from
/// the BB1 into the branch block to speculatively execute it.
+///
+/// Turn
+/// BB:
+/// %t1 = icmp
+/// br i1 %t1, label %BB1, label %BB2
+/// BB1:
+/// %t3 = add %t2, c
+/// br label BB2
+/// BB2:
+/// =>
+/// BB:
+/// %t1 = icmp
+/// %t4 = add %t2, c
+/// %t3 = select i1 %t1, %t2, %t3
static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// Only speculatively execution a single instruction (not counting the
// terminator) for now.
@@ -967,8 +985,29 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
return false;
HInst = I;
}
- if (!HInst)
- return false;
+
+ BasicBlock *BIParent = BI->getParent();
+
+ // Check the instruction to be hoisted, if there is one.
+ if (HInst) {
+ // Don't hoist the instruction if it's unsafe or expensive.
+ if (!isSafeToSpeculativelyExecute(HInst))
+ return false;
+ if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold)
+ return false;
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in this BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (OpI && OpI->getParent() == BIParent &&
+ !OpI->mayHaveSideEffects() &&
+ !OpI->isUsedInBasicBlock(BIParent))
+ return false;
+ }
+ }
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
@@ -983,130 +1022,78 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
Invert = true;
}
- // Turn
- // BB:
- // %t1 = icmp
- // br i1 %t1, label %BB1, label %BB2
- // BB1:
- // %t3 = add %t2, c
- // br label BB2
- // BB2:
- // =>
- // BB:
- // %t1 = icmp
- // %t4 = add %t2, c
- // %t3 = select i1 %t1, %t2, %t3
- switch (HInst->getOpcode()) {
- default: return false; // Not safe / profitable to hoist.
- case Instruction::Add:
- case Instruction::Sub:
- // Not worth doing for vector ops.
- if (HInst->getType()->isVectorTy())
- return false;
- break;
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- // Don't mess with vector operations.
- if (HInst->getType()->isVectorTy())
- return false;
- break; // These are all cheap and non-trapping instructions.
- }
-
- // If the instruction is obviously dead, don't try to predicate it.
- if (HInst->use_empty()) {
- HInst->eraseFromParent();
- return true;
+ // Collect interesting PHIs, and scan for hazards.
+ SmallSetVector<std::pair<Value *, Value *>, 4> PHIs;
+ BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+ for (BasicBlock::iterator I = BB2->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BIParentV = PN->getIncomingValueForBlock(BIParent);
+
+ // Skip PHIs which are trivial.
+ if (BB1V == BIParentV)
+ continue;
+
+ // Check for saftey.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) {
+ // An unfolded ConstantExpr could end up getting expanded into
+ // Instructions. Don't speculate this and another instruction at
+ // the same time.
+ if (HInst)
+ return false;
+ if (!isSafeToSpeculativelyExecute(CE))
+ return false;
+ if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+ return false;
+ }
+
+ // Ok, we may insert a select for this PHI.
+ PHIs.insert(std::make_pair(BB1V, BIParentV));
}
- // Can we speculatively execute the instruction? And what is the value
- // if the condition is false? Consider the phi uses, if the incoming value
- // from the "if" block are all the same V, then V is the value of the
- // select if the condition is false.
- BasicBlock *BIParent = BI->getParent();
- SmallVector<PHINode*, 4> PHIUses;
- Value *FalseV = NULL;
+ // If there are no PHIs to process, bail early. This helps ensure idempotence
+ // as well.
+ if (PHIs.empty())
+ return false;
- BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
- for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
- UI != E; ++UI) {
- // Ignore any user that is not a PHI node in BB2. These can only occur in
- // unreachable blocks, because they would not be dominated by the instr.
- PHINode *PN = dyn_cast<PHINode>(*UI);
- if (!PN || PN->getParent() != BB2)
- return false;
- PHIUses.push_back(PN);
-
- Value *PHIV = PN->getIncomingValueForBlock(BIParent);
- if (!FalseV)
- FalseV = PHIV;
- else if (FalseV != PHIV)
- return false; // Inconsistent value when condition is false.
- }
-
- assert(FalseV && "Must have at least one user, and it must be a PHI");
-
- // Do not hoist the instruction if any of its operands are defined but not
- // used in this BB. The transformation will prevent the operand from
- // being sunk into the use block.
- for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
- i != e; ++i) {
- Instruction *OpI = dyn_cast<Instruction>(*i);
- if (OpI && OpI->getParent() == BIParent &&
- !OpI->isUsedInBasicBlock(BIParent))
- return false;
- }
+ // If we get here, we can hoist the instruction and if-convert.
+ DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
- // If we get here, we can hoist the instruction. Try to place it
- // before the icmp instruction preceding the conditional branch.
- BasicBlock::iterator InsertPos = BI;
- if (InsertPos != BIParent->begin())
- --InsertPos;
- // Skip debug info between condition and branch.
- while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
- --InsertPos;
- if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
- SmallPtrSet<Instruction *, 4> BB1Insns;
- for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end();
- BB1I != BB1E; ++BB1I)
- BB1Insns.insert(BB1I);
- for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
- UI != UE; ++UI) {
- Instruction *Use = cast<Instruction>(*UI);
- if (!BB1Insns.count(Use)) continue;
-
- // If BrCond uses the instruction that place it just before
- // branch instruction.
- InsertPos = BI;
- break;
- }
- } else
- InsertPos = BI;
- BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+ // Hoist the instruction.
+ if (HInst)
+ BIParent->getInstList().splice(BI, BB1->getInstList(), HInst);
- // Create a select whose true value is the speculatively executed value and
- // false value is the previously determined FalseV.
+ // Insert selects and rewrite the PHI operands.
IRBuilder<true, NoFolder> Builder(BI);
- SelectInst *SI;
- if (Invert)
- SI = cast<SelectInst>
- (Builder.CreateSelect(BrCond, FalseV, HInst,
- FalseV->getName() + "." + HInst->getName()));
- else
- SI = cast<SelectInst>
- (Builder.CreateSelect(BrCond, HInst, FalseV,
- HInst->getName() + "." + FalseV->getName()));
-
- // Make the PHI node use the select for all incoming values for "then" and
- // "if" blocks.
- for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
- PHINode *PN = PHIUses[i];
- for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
- if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
- PN->setIncomingValue(j, SI);
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ Value *TrueV = PHIs[i].first;
+ Value *FalseV = PHIs[i].second;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the previously determined FalseV.
+ SelectInst *SI;
+ if (Invert)
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BrCond, FalseV, TrueV,
+ FalseV->getName() + "." + TrueV->getName()));
+ else
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BrCond, TrueV, FalseV,
+ TrueV->getName() + "." + FalseV->getName()));
+
+ // Make the PHI node use the select for all incoming values for "then" and
+ // "if" blocks.
+ for (BasicBlock::iterator I = BB2->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned BB1I = PN->getBasicBlockIndex(BB1);
+ unsigned BIParentI = PN->getBasicBlockIndex(BIParent);
+ Value *BB1V = PN->getIncomingValue(BB1I);
+ Value *BIParentV = PN->getIncomingValue(BIParentI);
+ if (TrueV == BB1V && FalseV == BIParentV) {
+ PN->setIncomingValue(BB1I, SI);
+ PN->setIncomingValue(BIParentI, SI);
+ }
+ }
}
++NumSpeculations;
@@ -1461,6 +1448,49 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
return true;
}
+/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the
+/// probabilities of the branch taking each edge. Fills in the two APInt
+/// parameters and return true, or returns false if no or invalid metadata was
+/// found.
+static bool ExtractBranchMetadata(BranchInst *BI,
+ APInt &ProbTrue, APInt &ProbFalse) {
+ assert(BI->isConditional() &&
+ "Looking for probabilities on unconditional branch?");
+ MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
+ ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
+ ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
+ if (!CITrue || !CIFalse) return false;
+ ProbTrue = CITrue->getValue();
+ ProbFalse = CIFalse->getValue();
+ assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 &&
+ "Branch probability metadata must be 32-bit integers");
+ return true;
+}
+
+/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In
+/// the event of overflow, logically-shifts all four inputs right until the
+/// multiply fits.
+static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
+ unsigned &BitsLost) {
+ BitsLost = 0;
+ bool Overflow = false;
+ APInt Result = A.umul_ov(B, Overflow);
+ if (Overflow) {
+ APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A);
+ do {
+ B = B.lshr(1);
+ ++BitsLost;
+ } while (B.ugt(MaxB));
+ A = A.lshr(BitsLost);
+ C = C.lshr(BitsLost);
+ D = D.lshr(BitsLost);
+ Result = A * B;
+ }
+ return Result;
+}
+
+
/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
/// predecessor branches to us and one of our successors, fold the block into
/// the predecessor and use logical operations to pick the right destination.
@@ -1479,7 +1509,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
-
+
// Allow a single instruction to be hoisted in addition to the compare
// that feeds the branch. We later ensure that any values that _it_ uses
// were also live in the predecessor, so that we don't unnecessarily create
@@ -1487,7 +1517,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *BonusInst = 0;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
- FrontIt->isSafeToSpeculativelyExecute()) {
+ isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
@@ -1557,7 +1587,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
SmallPtrSet<Value*, 4> UsedValues;
for (Instruction::op_iterator OI = BonusInst->op_begin(),
OE = BonusInst->op_end(); OI != OE; ++OI) {
- Value* V = *OI;
+ Value *V = *OI;
if (!isa<Constant>(V))
UsedValues.insert(V);
}
@@ -1602,10 +1632,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
}
PBI->setCondition(NewCond);
- BasicBlock *OldTrue = PBI->getSuccessor(0);
- BasicBlock *OldFalse = PBI->getSuccessor(1);
- PBI->setSuccessor(0, OldFalse);
- PBI->setSuccessor(1, OldTrue);
+ PBI->swapSuccessors();
}
// If we have a bonus inst, clone it into the predecessor block.
@@ -1638,6 +1665,94 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
PBI->setSuccessor(1, FalseDest);
}
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+
+ // Merge probability data into PredBlock's branch.
+ APInt A, B, C, D;
+ if (ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) {
+ // Given IR which does:
+ // bbA:
+ // br i1 %x, label %bbB, label %bbC
+ // bbB:
+ // br i1 %y, label %bbD, label %bbC
+ // Let's call the probability that we take the edge from %bbA to %bbB
+ // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to
+ // %bbC probability 'd'.
+ //
+ // We transform the IR into:
+ // bbA:
+ // br i1 %z, label %bbD, label %bbC
+ // where the probability of going to %bbD is (a*c) and going to bbC is
+ // (b+a*d).
+ //
+ // Probabilities aren't stored as ratios directly. Using branch weights,
+ // we get:
+ // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D.
+
+ // In the event of overflow, we want to drop the LSB of the input
+ // probabilities.
+ unsigned BitsLost;
+
+ // Ignore overflow result on ProbTrue.
+ APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost);
+
+ APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost);
+ if (BitsLost) {
+ ProbTrue = ProbTrue.lshr(BitsLost*2);
+ }
+
+ APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost);
+ if (BitsLost) {
+ ProbTrue = ProbTrue.lshr(BitsLost*2);
+ Tmp1 = Tmp1.lshr(BitsLost*2);
+ }
+
+ APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost);
+ if (BitsLost) {
+ ProbTrue = ProbTrue.lshr(BitsLost*2);
+ Tmp1 = Tmp1.lshr(BitsLost*2);
+ Tmp2 = Tmp2.lshr(BitsLost*2);
+ }
+
+ bool Overflow1 = false, Overflow2 = false;
+ APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1);
+ APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2);
+
+ if (Overflow1 || Overflow2) {
+ ProbTrue = ProbTrue.lshr(1);
+ Tmp1 = Tmp1.lshr(1);
+ Tmp2 = Tmp2.lshr(1);
+ Tmp3 = Tmp3.lshr(1);
+ Tmp4 = Tmp2 + Tmp3;
+ ProbFalse = Tmp4 + Tmp1;
+ }
+
+ // The sum of branch weights must fit in 32-bits.
+ if (ProbTrue.isNegative() && ProbFalse.isNegative()) {
+ ProbTrue = ProbTrue.lshr(1);
+ ProbFalse = ProbFalse.lshr(1);
+ }
+
+ if (ProbTrue != ProbFalse) {
+ // Normalize the result.
+ APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse);
+ ProbTrue = ProbTrue.udiv(GCD);
+ ProbFalse = ProbFalse.udiv(GCD);
+
+ LLVMContext &Context = BI->getContext();
+ Value *Ops[3];
+ Ops[0] = BI->getMetadata(LLVMContext::MD_prof)->getOperand(0);
+ Ops[1] = ConstantInt::get(Context, ProbTrue);
+ Ops[2] = ConstantInt::get(Context, ProbFalse);
+ PBI->setMetadata(LLVMContext::MD_prof, MDNode::get(Context, Ops));
+ } else {
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
+ }
+ } else {
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
+ }
+
// Copy any debug value intrinsics into the end of PredBlock.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (isa<DbgInfoIntrinsic>(*I))
@@ -1894,8 +2009,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
// Find the relevant condition and destinations.
Value *Condition = Select->getCondition();
- BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
- BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
// Perform the actual simplification.
return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
@@ -1979,7 +2094,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// Ok, the block is reachable from the default dest. If the constant we're
// comparing exists in one of the other edges, then we can constant fold ICI
// and zap it.
- if (SI->findCaseValue(Cst) != 0) {
+ if (SI->findCaseValue(Cst) != SI->case_default()) {
Value *V;
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
V = ConstantInt::getFalse(BB->getContext());
@@ -2235,52 +2350,6 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
return false;
}
-bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
- // Check to see if the first instruction in this block is just an unwind.
- // If so, replace any invoke instructions which use this as an exception
- // destination with call instructions.
- BasicBlock *BB = UI->getParent();
- if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
-
- bool Changed = false;
- SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
- while (!Preds.empty()) {
- BasicBlock *Pred = Preds.back();
- InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
- if (II && II->getUnwindDest() == BB) {
- // Insert a new branch instruction before the invoke, because this
- // is now a fall through.
- Builder.SetInsertPoint(II);
- BranchInst *BI = Builder.CreateBr(II->getNormalDest());
- Pred->getInstList().remove(II); // Take out of symbol table
-
- // Insert the call now.
- SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
- Builder.SetInsertPoint(BI);
- CallInst *CI = Builder.CreateCall(II->getCalledValue(),
- Args, II->getName());
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the Call now does instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
-
- Preds.pop_back();
- }
-
- // If this block is now dead (and isn't the entry block), remove it.
- if (pred_begin(BB) == pred_end(BB) &&
- BB != &BB->getParent()->getEntryBlock()) {
- // We know there are no successors, so just nuke the block.
- BB->eraseFromParent();
- return true;
- }
-
- return Changed;
-}
-
bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *BB = UI->getParent();
@@ -2352,8 +2421,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- if (SI->getSuccessor(i) == BB) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == BB) {
BB->removePredecessor(SI->getParent());
SI->removeCase(i);
--i; --e;
@@ -2361,14 +2431,15 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
// If the default value is unreachable, figure out the most popular
// destination and make it the default.
- if (SI->getSuccessor(0) == BB) {
+ if (SI->getDefaultDest() == BB) {
std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
- std::pair<unsigned, unsigned>& entry =
- Popularity[SI->getSuccessor(i)];
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ std::pair<unsigned, unsigned> &entry =
+ Popularity[i.getCaseSuccessor()];
if (entry.first == 0) {
entry.first = 1;
- entry.second = i;
+ entry.second = i.getCaseIndex();
} else {
entry.first++;
}
@@ -2390,7 +2461,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
if (MaxBlock) {
// Make this the new default, allowing us to delete any explicit
// edges to it.
- SI->setSuccessor(0, MaxBlock);
+ SI->setDefaultDest(MaxBlock);
Changed = true;
// If MaxBlock has phinodes in it, remove MaxPop-1 entries from
@@ -2399,8 +2470,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
for (unsigned i = 0; i != MaxPop-1; ++i)
MaxBlock->removePredecessor(SI->getParent());
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- if (SI->getSuccessor(i) == MaxBlock) {
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseSuccessor() == MaxBlock) {
SI->removeCase(i);
--i; --e;
}
@@ -2442,17 +2514,19 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
/// integer range comparison into a sub, an icmp and a branch.
static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
- assert(SI->getNumCases() > 2 && "Degenerate switch?");
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
// Make sure all cases point to the same destination and gather the values.
SmallVector<ConstantInt *, 16> Cases;
- Cases.push_back(SI->getCaseValue(1));
- for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
- if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+ SwitchInst::CaseIt I = SI->case_begin();
+ Cases.push_back(I.getCaseValue());
+ SwitchInst::CaseIt PrevI = I++;
+ for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
+ if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
return false;
- Cases.push_back(SI->getCaseValue(I));
+ Cases.push_back(I.getCaseValue());
}
- assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+ assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
// Sort the case values, then check if they form a range we can transform.
array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
@@ -2462,18 +2536,19 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
}
Constant *Offset = ConstantExpr::getNeg(Cases.back());
- Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+ Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest());
+ Builder.CreateCondBr(
+ Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
// Prune obsolete incoming values off the successor's PHI nodes.
- for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+ for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
isa<PHINode>(BBI); ++BBI) {
- for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+ for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
SI->eraseFromParent();
@@ -2487,24 +2562,26 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Value *Cond = SI->getCondition();
unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne);
+ ComputeMaskedBits(Cond, KnownZero, KnownOne);
// Gather dead cases.
SmallVector<ConstantInt*, 8> DeadCases;
- for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
- if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
- (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
- DeadCases.push_back(SI->getCaseValue(I));
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
+ (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
+ DeadCases.push_back(I.getCaseValue());
DEBUG(dbgs() << "SimplifyCFG: switch case '"
- << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+ << I.getCaseValue() << "' is dead.\n");
}
}
// Remove dead cases from the switch.
for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
- unsigned Case = SI->findCaseValue(DeadCases[I]);
+ SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+ assert(Case != SI->case_default() &&
+ "Case was not found. Probably mistake in DeadCases forming.");
// Prune unused values from PHI nodes.
- SI->getSuccessor(Case)->removePredecessor(SI->getParent());
+ Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
@@ -2553,9 +2630,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
ForwardingNodesMap ForwardingNodes;
- for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
- ConstantInt *CaseValue = SI->getCaseValue(I);
- BasicBlock *CaseDest = SI->getSuccessor(I);
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ ConstantInt *CaseValue = I.getCaseValue();
+ BasicBlock *CaseDest = I.getCaseSuccessor();
int PhiIndex;
PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
@@ -2676,8 +2753,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
- if (I->isTerminator()
- && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+ if (I->isTerminator() &&
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
return true;
}
@@ -2720,6 +2797,12 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SimplifyBranchOnICmpChain(BI, TD, Builder))
return true;
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the comparison into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB) | true;
+
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
// there is any identical code in the "then" and "else" blocks. If so, we
@@ -2754,12 +2837,6 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (FoldCondBranchOnPHI(BI, TD))
return SimplifyCFG(BB) | true;
- // If this basic block is ONLY a setcc and a branch, and if a predecessor
- // branches to us and one of our successors, fold the setcc into the
- // predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB) | true;
-
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
@@ -2809,7 +2886,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
}
/// If BB has an incoming value that will always trigger undefined behavior
-/// (eg. null pointer derefence), remove the branch leading here.
+/// (eg. null pointer dereference), remove the branch leading here.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
for (BasicBlock::iterator i = BB->begin();
PHINode *PHI = dyn_cast<PHINode>(i); ++i)
@@ -2883,17 +2960,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
} else {
if (SimplifyCondBranch(BI, Builder)) return true;
}
- } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
- if (SimplifyResume(RI, Builder)) return true;
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
if (SimplifyReturn(RI, Builder)) return true;
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (SimplifyResume(RI, Builder)) return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
if (SimplifySwitch(SI, Builder)) return true;
} else if (UnreachableInst *UI =
dyn_cast<UnreachableInst>(BB->getTerminator())) {
if (SimplifyUnreachable(UI)) return true;
- } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- if (SimplifyUnwind(UI, Builder)) return true;
} else if (IndirectBrInst *IBI =
dyn_cast<IndirectBrInst>(BB->getTerminator())) {
if (SimplifyIndirectBr(IBI)) return true;
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 76289c055b90..4030befaffa1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -46,7 +46,6 @@ namespace {
LoopInfo *LI;
DominatorTree *DT;
ScalarEvolution *SE;
- IVUsers *IU; // NULL for DisableIVRewrite
const TargetData *TD; // May be NULL
SmallVectorImpl<WeakVH> &DeadInsts;
@@ -59,7 +58,6 @@ namespace {
L(Loop),
LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
SE(SE),
- IU(IVU),
TD(LPM->getAnalysisIfAvailable<TargetData>()),
DeadInsts(Dead),
Changed(false) {
@@ -107,8 +105,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
// Attempt to fold a binary operator with constant operand.
// e.g. ((I + 1) >> 2) => I >> 2
- if (IVOperand->getNumOperands() != 2 ||
- !isa<ConstantInt>(IVOperand->getOperand(1)))
+ if (!isa<BinaryOperator>(IVOperand)
+ || !isa<ConstantInt>(IVOperand->getOperand(1)))
return 0;
IVSrc = IVOperand->getOperand(0);
@@ -229,11 +227,6 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
Rem->replaceAllUsesWith(Sel);
}
- // Inform IVUsers about the new users.
- if (IU) {
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I);
- }
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
Changed = true;
@@ -375,6 +368,8 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
namespace llvm {
+void IVVisitor::anchor() { }
+
/// simplifyUsersOfIV - Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
@@ -397,36 +392,4 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
return Changed;
}
-/// simplifyIVUsers - Perform simplification on instructions recorded by the
-/// IVUsers pass.
-///
-/// This is the old approach to IV simplification to be replaced by
-/// SimplifyLoopIVs.
-bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead) {
- SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead);
-
- // Each round of simplification involves a round of eliminating operations
- // followed by a round of widening IVs. A single IVUsers worklist is used
- // across all rounds. The inner loop advances the user. If widening exposes
- // more uses, then another pass through the outer loop is triggered.
- for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
- Instruction *UseInst = I->getUser();
- Value *IVOperand = I->getOperandValToReplace();
-
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- SIV.eliminateIVComparison(ICmp, IVOperand);
- continue;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned);
- continue;
- }
- }
- }
- return SIV.hasChanged();
-}
-
} // namespace llvm
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index ac005f95b33a..81eb9e0f8ae1 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -39,12 +40,14 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
/// runOnFunction - Remove instructions that simplify.
bool runOnFunction(Function &F) {
const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -60,7 +63,7 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
@@ -84,8 +87,11 @@ namespace {
}
char InstSimplifier::ID = 0;
-INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
- false, false)
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
// Public interface to the simplify instructions pass.
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 46d4adaaa154..b1cad06dffe9 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -50,33 +50,13 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// return.
//
std::vector<BasicBlock*> ReturningBlocks;
- std::vector<BasicBlock*> UnwindingBlocks;
std::vector<BasicBlock*> UnreachableBlocks;
for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
if (isa<ReturnInst>(I->getTerminator()))
ReturningBlocks.push_back(I);
- else if (isa<UnwindInst>(I->getTerminator()))
- UnwindingBlocks.push_back(I);
else if (isa<UnreachableInst>(I->getTerminator()))
UnreachableBlocks.push_back(I);
- // Handle unwinding blocks first.
- if (UnwindingBlocks.empty()) {
- UnwindBlock = 0;
- } else if (UnwindingBlocks.size() == 1) {
- UnwindBlock = UnwindingBlocks.front();
- } else {
- UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
- new UnwindInst(F.getContext(), UnwindBlock);
-
- for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
- E = UnwindingBlocks.end(); I != E; ++I) {
- BasicBlock *BB = *I;
- BB->getInstList().pop_back(); // Remove the unwind insn
- BranchInst::Create(UnwindBlock, BB);
- }
- }
-
// Then unreachable blocks.
if (UnreachableBlocks.empty()) {
UnreachableBlock = 0;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
new file mode 100644
index 000000000000..286b54f2f067
--- /dev/null
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -0,0 +1,1907 @@
+//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a basic-block vectorization pass. The algorithm was
+// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
+// et al. It works by looking for chains of pairable operations and then
+// pairing them.
+//
+//===----------------------------------------------------------------------===//
+
+#define BBV_NAME "bb-vectorize"
+#define DEBUG_TYPE BBV_NAME
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Vectorize.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
+ cl::desc("The required chain depth for vectorization"));
+
+static cl::opt<unsigned>
+SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
+ cl::desc("The maximum search distance for instruction pairs"));
+
+static cl::opt<bool>
+SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
+ cl::desc("Replicating one element to a pair breaks the chain"));
+
+static cl::opt<unsigned>
+VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
+ cl::desc("The size of the native vector registers"));
+
+static cl::opt<unsigned>
+MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
+ cl::desc("The maximum number of pairing iterations"));
+
+static cl::opt<unsigned>
+MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
+ cl::desc("The maximum number of pairable instructions per group"));
+
+static cl::opt<unsigned>
+MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
+ cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
+ " a full cycle check"));
+
+static cl::opt<bool>
+NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize integer values"));
+
+static cl::opt<bool>
+NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize floating-point values"));
+
+static cl::opt<bool>
+NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize casting (conversion) operations"));
+
+static cl::opt<bool>
+NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize floating-point math intrinsics"));
+
+static cl::opt<bool>
+NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
+
+static cl::opt<bool>
+NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize loads and stores"));
+
+static cl::opt<bool>
+AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
+ cl::desc("Only generate aligned loads and stores"));
+
+static cl::opt<bool>
+NoMemOpBoost("bb-vectorize-no-mem-op-boost",
+ cl::init(false), cl::Hidden,
+ cl::desc("Don't boost the chain-depth contribution of loads and stores"));
+
+static cl::opt<bool>
+FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
+ cl::desc("Use a fast instruction dependency analysis"));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " instruction-examination process"));
+static cl::opt<bool>
+DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " candidate-selection process"));
+static cl::opt<bool>
+DebugPairSelection("bb-vectorize-debug-pair-selection",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " pair-selection process"));
+static cl::opt<bool>
+DebugCycleCheck("bb-vectorize-debug-cycle-check",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " cycle-checking process"));
+#endif
+
+STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
+
+namespace {
+ struct BBVectorize : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ const VectorizeConfig Config;
+
+ BBVectorize(const VectorizeConfig &C = VectorizeConfig())
+ : BasicBlockPass(ID), Config(C) {
+ initializeBBVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ BBVectorize(Pass *P, const VectorizeConfig &C)
+ : BasicBlockPass(ID), Config(C) {
+ AA = &P->getAnalysis<AliasAnalysis>();
+ SE = &P->getAnalysis<ScalarEvolution>();
+ TD = P->getAnalysisIfAvailable<TargetData>();
+ }
+
+ typedef std::pair<Value *, Value *> ValuePair;
+ typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
+ typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
+ typedef std::pair<std::multimap<Value *, Value *>::iterator,
+ std::multimap<Value *, Value *>::iterator> VPIteratorPair;
+ typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
+ std::multimap<ValuePair, ValuePair>::iterator>
+ VPPIteratorPair;
+
+ AliasAnalysis *AA;
+ ScalarEvolution *SE;
+ TargetData *TD;
+
+ // FIXME: const correct?
+
+ bool vectorizePairs(BasicBlock &BB);
+
+ bool getCandidatePairs(BasicBlock &BB,
+ BasicBlock::iterator &Start,
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts);
+
+ void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs);
+
+ void buildDepMap(BasicBlock &BB,
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &PairableInstUsers);
+
+ void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs);
+
+ void fuseChosenPairs(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *>& ChosenPairs);
+
+ bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
+
+ bool areInstsCompatible(Instruction *I, Instruction *J,
+ bool IsSimpleLoadStore);
+
+ bool trackUsesOfI(DenseSet<Value *> &Users,
+ AliasSetTracker &WriteSet, Instruction *I,
+ Instruction *J, bool UpdateUsers = true,
+ std::multimap<Value *, Value *> *LoadMoveSet = 0);
+
+ void computePairsConnectedTo(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ ValuePair P);
+
+ bool pairsConflict(ValuePair P, ValuePair Q,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0);
+
+ bool pairWillFormCycle(ValuePair P,
+ std::multimap<ValuePair, ValuePair> &PairableInstUsers,
+ DenseSet<ValuePair> &CurrentPairs);
+
+ void pruneTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &Tree,
+ DenseSet<ValuePair> &PrunedTree, ValuePair J,
+ bool UseCycleCheck);
+
+ void buildInitialTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &Tree, ValuePair J);
+
+ void findBestTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
+ size_t &BestEffSize, VPIteratorPair ChoiceRange,
+ bool UseCycleCheck);
+
+ Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool &FlipMemInputs);
+
+ void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+ unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
+ unsigned IdxOffset, std::vector<Constant*> &Mask);
+
+ Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
+ Instruction *J);
+
+ Value *getReplacementInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool FlipMemInputs);
+
+ void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
+ bool &FlipMemInputs);
+
+ void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, Instruction *K,
+ Instruction *&InsertionPt, Instruction *&K1,
+ Instruction *&K2, bool &FlipMemInputs);
+
+ void collectPairLoadMoveSet(BasicBlock &BB,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *I);
+
+ void collectLoadMoveSet(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ std::multimap<Value *, Value *> &LoadMoveSet);
+
+ bool canMoveUsesOfIAfterJ(BasicBlock &BB,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *I, Instruction *J);
+
+ void moveUsesOfIAfterJ(BasicBlock &BB,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *&InsertionPt,
+ Instruction *I, Instruction *J);
+
+ bool vectorizeBB(BasicBlock &BB) {
+ bool changed = false;
+ // Iterate a sufficient number of times to merge types of size 1 bit,
+ // then 2 bits, then 4, etc. up to half of the target vector width of the
+ // target vector register.
+ for (unsigned v = 2, n = 1;
+ v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
+ v *= 2, ++n) {
+ DEBUG(dbgs() << "BBV: fusing loop #" << n <<
+ " for " << BB.getName() << " in " <<
+ BB.getParent()->getName() << "...\n");
+ if (vectorizePairs(BB))
+ changed = true;
+ else
+ break;
+ }
+
+ DEBUG(dbgs() << "BBV: done!\n");
+ return changed;
+ }
+
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ AA = &getAnalysis<AliasAnalysis>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ return vectorizeBB(BB);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ BasicBlockPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.setPreservesCFG();
+ }
+
+ // This returns the vector type that holds a pair of the provided type.
+ // If the provided type is already a vector, then its length is doubled.
+ static inline VectorType *getVecTypeForPair(Type *ElemTy) {
+ if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
+ unsigned numElem = VTy->getNumElements();
+ return VectorType::get(ElemTy->getScalarType(), numElem*2);
+ }
+
+ return VectorType::get(ElemTy, 2);
+ }
+
+ // Returns the weight associated with the provided value. A chain of
+ // candidate pairs has a length given by the sum of the weights of its
+ // members (one weight per pair; the weight of each member of the pair
+ // is assumed to be the same). This length is then compared to the
+ // chain-length threshold to determine if a given chain is significant
+ // enough to be vectorized. The length is also used in comparing
+ // candidate chains where longer chains are considered to be better.
+ // Note: when this function returns 0, the resulting instructions are
+ // not actually fused.
+ inline size_t getDepthFactor(Value *V) {
+ // InsertElement and ExtractElement have a depth factor of zero. This is
+ // for two reasons: First, they cannot be usefully fused. Second, because
+ // the pass generates a lot of these, they can confuse the simple metric
+ // used to compare the trees in the next iteration. Thus, giving them a
+ // weight of zero allows the pass to essentially ignore them in
+ // subsequent iterations when looking for vectorization opportunities
+ // while still tracking dependency chains that flow through those
+ // instructions.
+ if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
+ return 0;
+
+ // Give a load or store half of the required depth so that load/store
+ // pairs will vectorize.
+ if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
+ return Config.ReqChainDepth/2;
+
+ return 1;
+ }
+
+ // This determines the relative offset of two loads or stores, returning
+ // true if the offset could be determined to be some constant value.
+ // For example, if OffsetInElmts == 1, then J accesses the memory directly
+ // after I; if OffsetInElmts == -1 then I accesses the memory
+ // directly after J. This function assumes that both instructions
+ // have the same type.
+ bool getPairPtrInfo(Instruction *I, Instruction *J,
+ Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
+ int64_t &OffsetInElmts) {
+ OffsetInElmts = 0;
+ if (isa<LoadInst>(I)) {
+ IPtr = cast<LoadInst>(I)->getPointerOperand();
+ JPtr = cast<LoadInst>(J)->getPointerOperand();
+ IAlignment = cast<LoadInst>(I)->getAlignment();
+ JAlignment = cast<LoadInst>(J)->getAlignment();
+ } else {
+ IPtr = cast<StoreInst>(I)->getPointerOperand();
+ JPtr = cast<StoreInst>(J)->getPointerOperand();
+ IAlignment = cast<StoreInst>(I)->getAlignment();
+ JAlignment = cast<StoreInst>(J)->getAlignment();
+ }
+
+ const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
+ const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
+
+ // If this is a trivial offset, then we'll get something like
+ // 1*sizeof(type). With target data, which we need anyway, this will get
+ // constant folded into a number.
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
+ if (const SCEVConstant *ConstOffSCEV =
+ dyn_cast<SCEVConstant>(OffsetSCEV)) {
+ ConstantInt *IntOff = ConstOffSCEV->getValue();
+ int64_t Offset = IntOff->getSExtValue();
+
+ Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+ int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
+
+ assert(VTy == cast<PointerType>(JPtr->getType())->getElementType());
+
+ OffsetInElmts = Offset/VTyTSS;
+ return (abs64(Offset) % VTyTSS) == 0;
+ }
+
+ return false;
+ }
+
+ // Returns true if the provided CallInst represents an intrinsic that can
+ // be vectorized.
+ bool isVectorizableIntrinsic(CallInst* I) {
+ Function *F = I->getCalledFunction();
+ if (!F) return false;
+
+ unsigned IID = F->getIntrinsicID();
+ if (!IID) return false;
+
+ switch(IID) {
+ default:
+ return false;
+ case Intrinsic::sqrt:
+ case Intrinsic::powi:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ return Config.VectorizeMath;
+ case Intrinsic::fma:
+ return Config.VectorizeFMA;
+ }
+ }
+
+ // Returns true if J is the second element in some pair referenced by
+ // some multimap pair iterator pair.
+ template <typename V>
+ bool isSecondInIteratorPair(V J, std::pair<
+ typename std::multimap<V, V>::iterator,
+ typename std::multimap<V, V>::iterator> PairRange) {
+ for (typename std::multimap<V, V>::iterator K = PairRange.first;
+ K != PairRange.second; ++K)
+ if (K->second == J) return true;
+
+ return false;
+ }
+ };
+
+ // This function implements one vectorization iteration on the provided
+ // basic block. It returns true if the block is changed.
+ bool BBVectorize::vectorizePairs(BasicBlock &BB) {
+ bool ShouldContinue;
+ BasicBlock::iterator Start = BB.getFirstInsertionPt();
+
+ std::vector<Value *> AllPairableInsts;
+ DenseMap<Value *, Value *> AllChosenPairs;
+
+ do {
+ std::vector<Value *> PairableInsts;
+ std::multimap<Value *, Value *> CandidatePairs;
+ ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
+ PairableInsts);
+ if (PairableInsts.empty()) continue;
+
+ // Now we have a map of all of the pairable instructions and we need to
+ // select the best possible pairing. A good pairing is one such that the
+ // users of the pair are also paired. This defines a (directed) forest
+ // over the pairs such that two pairs are connected iff the second pair
+ // uses the first.
+
+ // Note that it only matters that both members of the second pair use some
+ // element of the first pair (to allow for splatting).
+
+ std::multimap<ValuePair, ValuePair> ConnectedPairs;
+ computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs);
+ if (ConnectedPairs.empty()) continue;
+
+ // Build the pairable-instruction dependency map
+ DenseSet<ValuePair> PairableInstUsers;
+ buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
+
+ // There is now a graph of the connected pairs. For each variable, pick
+ // the pairing with the largest tree meeting the depth requirement on at
+ // least one branch. Then select all pairings that are part of that tree
+ // and remove them from the list of available pairings and pairable
+ // variables.
+
+ DenseMap<Value *, Value *> ChosenPairs;
+ choosePairs(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairableInstUsers, ChosenPairs);
+
+ if (ChosenPairs.empty()) continue;
+ AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
+ PairableInsts.end());
+ AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
+ } while (ShouldContinue);
+
+ if (AllChosenPairs.empty()) return false;
+ NumFusedOps += AllChosenPairs.size();
+
+ // A set of pairs has now been selected. It is now necessary to replace the
+ // paired instructions with vector instructions. For this procedure each
+ // operand must be replaced with a vector operand. This vector is formed
+ // by using build_vector on the old operands. The replaced values are then
+ // replaced with a vector_extract on the result. Subsequent optimization
+ // passes should coalesce the build/extract combinations.
+
+ fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs);
+ return true;
+ }
+
+ // This function returns true if the provided instruction is capable of being
+ // fused into a vector instruction. This determination is based only on the
+ // type and other attributes of the instruction.
+ bool BBVectorize::isInstVectorizable(Instruction *I,
+ bool &IsSimpleLoadStore) {
+ IsSimpleLoadStore = false;
+
+ if (CallInst *C = dyn_cast<CallInst>(I)) {
+ if (!isVectorizableIntrinsic(C))
+ return false;
+ } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ // Vectorize simple loads if possbile:
+ IsSimpleLoadStore = L->isSimple();
+ if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
+ return false;
+ } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ // Vectorize simple stores if possbile:
+ IsSimpleLoadStore = S->isSimple();
+ if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
+ return false;
+ } else if (CastInst *C = dyn_cast<CastInst>(I)) {
+ // We can vectorize casts, but not casts of pointer types, etc.
+ if (!Config.VectorizeCasts)
+ return false;
+
+ Type *SrcTy = C->getSrcTy();
+ if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy())
+ return false;
+
+ Type *DestTy = C->getDestTy();
+ if (!DestTy->isSingleValueType() || DestTy->isPointerTy())
+ return false;
+ } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
+ isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
+ return false;
+ }
+
+ // We can't vectorize memory operations without target data
+ if (TD == 0 && IsSimpleLoadStore)
+ return false;
+
+ Type *T1, *T2;
+ if (isa<StoreInst>(I)) {
+ // For stores, it is the value type, not the pointer type that matters
+ // because the value is what will come from a vector register.
+
+ Value *IVal = cast<StoreInst>(I)->getValueOperand();
+ T1 = IVal->getType();
+ } else {
+ T1 = I->getType();
+ }
+
+ if (I->isCast())
+ T2 = cast<CastInst>(I)->getSrcTy();
+ else
+ T2 = T1;
+
+ // Not every type can be vectorized...
+ if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
+ !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
+ return false;
+
+ if (!Config.VectorizeInts
+ && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+ return false;
+
+ if (!Config.VectorizeFloats
+ && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
+ return false;
+
+ if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
+ T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
+ return false;
+
+ return true;
+ }
+
+ // This function returns true if the two provided instructions are compatible
+ // (meaning that they can be fused into a vector instruction). This assumes
+ // that I has already been determined to be vectorizable and that J is not
+ // in the use tree of I.
+ bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
+ bool IsSimpleLoadStore) {
+ DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
+ " <-> " << *J << "\n");
+
+ // Loads and stores can be merged if they have different alignments,
+ // but are otherwise the same.
+ LoadInst *LI, *LJ;
+ StoreInst *SI, *SJ;
+ if ((LI = dyn_cast<LoadInst>(I)) && (LJ = dyn_cast<LoadInst>(J))) {
+ if (I->getType() != J->getType())
+ return false;
+
+ if (LI->getPointerOperand()->getType() !=
+ LJ->getPointerOperand()->getType() ||
+ LI->isVolatile() != LJ->isVolatile() ||
+ LI->getOrdering() != LJ->getOrdering() ||
+ LI->getSynchScope() != LJ->getSynchScope())
+ return false;
+ } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) {
+ if (SI->getValueOperand()->getType() !=
+ SJ->getValueOperand()->getType() ||
+ SI->getPointerOperand()->getType() !=
+ SJ->getPointerOperand()->getType() ||
+ SI->isVolatile() != SJ->isVolatile() ||
+ SI->getOrdering() != SJ->getOrdering() ||
+ SI->getSynchScope() != SJ->getSynchScope())
+ return false;
+ } else if (!J->isSameOperationAs(I)) {
+ return false;
+ }
+ // FIXME: handle addsub-type operations!
+
+ if (IsSimpleLoadStore) {
+ Value *IPtr, *JPtr;
+ unsigned IAlignment, JAlignment;
+ int64_t OffsetInElmts = 0;
+ if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ OffsetInElmts) && abs64(OffsetInElmts) == 1) {
+ if (Config.AlignedOnly) {
+ Type *aType = isa<StoreInst>(I) ?
+ cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
+ // An aligned load or store is possible only if the instruction
+ // with the lower offset has an alignment suitable for the
+ // vector type.
+
+ unsigned BottomAlignment = IAlignment;
+ if (OffsetInElmts < 0) BottomAlignment = JAlignment;
+
+ Type *VType = getVecTypeForPair(aType);
+ unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
+ if (BottomAlignment < VecAlignment)
+ return false;
+ }
+ } else {
+ return false;
+ }
+ } else if (isa<ShuffleVectorInst>(I)) {
+ // Only merge two shuffles if they're both constant
+ return isa<Constant>(I->getOperand(2)) &&
+ isa<Constant>(J->getOperand(2));
+ // FIXME: We may want to vectorize non-constant shuffles also.
+ }
+
+ // The powi intrinsic is special because only the first argument is
+ // vectorized, the second arguments must be equal.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ Function *FI;
+ if (CI && (FI = CI->getCalledFunction()) &&
+ FI->getIntrinsicID() == Intrinsic::powi) {
+
+ Value *A1I = CI->getArgOperand(1),
+ *A1J = cast<CallInst>(J)->getArgOperand(1);
+ const SCEV *A1ISCEV = SE->getSCEV(A1I),
+ *A1JSCEV = SE->getSCEV(A1J);
+ return (A1ISCEV == A1JSCEV);
+ }
+
+ return true;
+ }
+
+ // Figure out whether or not J uses I and update the users and write-set
+ // structures associated with I. Specifically, Users represents the set of
+ // instructions that depend on I. WriteSet represents the set
+ // of memory locations that are dependent on I. If UpdateUsers is true,
+ // and J uses I, then Users is updated to contain J and WriteSet is updated
+ // to contain any memory locations to which J writes. The function returns
+ // true if J uses I. By default, alias analysis is used to determine
+ // whether J reads from memory that overlaps with a location in WriteSet.
+ // If LoadMoveSet is not null, then it is a previously-computed multimap
+ // where the key is the memory-based user instruction and the value is
+ // the instruction to be compared with I. So, if LoadMoveSet is provided,
+ // then the alias analysis is not used. This is necessary because this
+ // function is called during the process of moving instructions during
+ // vectorization and the results of the alias analysis are not stable during
+ // that process.
+ bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
+ AliasSetTracker &WriteSet, Instruction *I,
+ Instruction *J, bool UpdateUsers,
+ std::multimap<Value *, Value *> *LoadMoveSet) {
+ bool UsesI = false;
+
+ // This instruction may already be marked as a user due, for example, to
+ // being a member of a selected pair.
+ if (Users.count(J))
+ UsesI = true;
+
+ if (!UsesI)
+ for (User::op_iterator JU = J->op_begin(), JE = J->op_end();
+ JU != JE; ++JU) {
+ Value *V = *JU;
+ if (I == V || Users.count(V)) {
+ UsesI = true;
+ break;
+ }
+ }
+ if (!UsesI && J->mayReadFromMemory()) {
+ if (LoadMoveSet) {
+ VPIteratorPair JPairRange = LoadMoveSet->equal_range(J);
+ UsesI = isSecondInIteratorPair<Value*>(I, JPairRange);
+ } else {
+ for (AliasSetTracker::iterator W = WriteSet.begin(),
+ WE = WriteSet.end(); W != WE; ++W) {
+ if (W->aliasesUnknownInst(J, *AA)) {
+ UsesI = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (UsesI && UpdateUsers) {
+ if (J->mayWriteToMemory()) WriteSet.add(J);
+ Users.insert(J);
+ }
+
+ return UsesI;
+ }
+
+ // This function iterates over all instruction pairs in the provided
+ // basic block and collects all candidate pairs for vectorization.
+ bool BBVectorize::getCandidatePairs(BasicBlock &BB,
+ BasicBlock::iterator &Start,
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts) {
+ BasicBlock::iterator E = BB.end();
+ if (Start == E) return false;
+
+ bool ShouldContinue = false, IAfterStart = false;
+ for (BasicBlock::iterator I = Start++; I != E; ++I) {
+ if (I == Start) IAfterStart = true;
+
+ bool IsSimpleLoadStore;
+ if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
+
+ // Look for an instruction with which to pair instruction *I...
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ bool JAfterStart = IAfterStart;
+ BasicBlock::iterator J = llvm::next(I);
+ for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
+ if (J == Start) JAfterStart = true;
+
+ // Determine if J uses I, if so, exit the loop.
+ bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+ if (Config.FastDep) {
+ // Note: For this heuristic to be effective, independent operations
+ // must tend to be intermixed. This is likely to be true from some
+ // kinds of grouped loop unrolling (but not the generic LLVM pass),
+ // but otherwise may require some kind of reordering pass.
+
+ // When using fast dependency analysis,
+ // stop searching after first use:
+ if (UsesI) break;
+ } else {
+ if (UsesI) continue;
+ }
+
+ // J does not use I, and comes before the first use of I, so it can be
+ // merged with I if the instructions are compatible.
+ if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue;
+
+ // J is a candidate for merging with I.
+ if (!PairableInsts.size() ||
+ PairableInsts[PairableInsts.size()-1] != I) {
+ PairableInsts.push_back(I);
+ }
+
+ CandidatePairs.insert(ValuePair(I, J));
+
+ // The next call to this function must start after the last instruction
+ // selected during this invocation.
+ if (JAfterStart) {
+ Start = llvm::next(J);
+ IAfterStart = JAfterStart = false;
+ }
+
+ DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
+ << *I << " <-> " << *J << "\n");
+
+ // If we have already found too many pairs, break here and this function
+ // will be called again starting after the last instruction selected
+ // during this invocation.
+ if (PairableInsts.size() >= Config.MaxInsts) {
+ ShouldContinue = true;
+ break;
+ }
+ }
+
+ if (ShouldContinue)
+ break;
+ }
+
+ DEBUG(dbgs() << "BBV: found " << PairableInsts.size()
+ << " instructions with candidate pairs\n");
+
+ return ShouldContinue;
+ }
+
+ // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that
+ // it looks for pairs such that both members have an input which is an
+ // output of PI or PJ.
+ void BBVectorize::computePairsConnectedTo(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ ValuePair P) {
+ // For each possible pairing for this variable, look at the uses of
+ // the first value...
+ for (Value::use_iterator I = P.first->use_begin(),
+ E = P.first->use_end(); I != E; ++I) {
+ VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
+
+ // For each use of the first variable, look for uses of the second
+ // variable...
+ for (Value::use_iterator J = P.second->use_begin(),
+ E2 = P.second->use_end(); J != E2; ++J) {
+ VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
+
+ // Look for <I, J>:
+ if (isSecondInIteratorPair<Value*>(*J, IPairRange))
+ ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+
+ // Look for <J, I>:
+ if (isSecondInIteratorPair<Value*>(*I, JPairRange))
+ ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
+ }
+
+ if (Config.SplatBreaksChain) continue;
+ // Look for cases where just the first value in the pair is used by
+ // both members of another pair (splatting).
+ for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
+ if (isSecondInIteratorPair<Value*>(*J, IPairRange))
+ ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+ }
+ }
+
+ if (Config.SplatBreaksChain) return;
+ // Look for cases where just the second value in the pair is used by
+ // both members of another pair (splatting).
+ for (Value::use_iterator I = P.second->use_begin(),
+ E = P.second->use_end(); I != E; ++I) {
+ VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
+
+ for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
+ if (isSecondInIteratorPair<Value*>(*J, IPairRange))
+ ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+ }
+ }
+ }
+
+ // This function figures out which pairs are connected. Two pairs are
+ // connected if some output of the first pair forms an input to both members
+ // of the second pair.
+ void BBVectorize::computeConnectedPairs(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs) {
+
+ for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+ PE = PairableInsts.end(); PI != PE; ++PI) {
+ VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI);
+
+ for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
+ P != choiceRange.second; ++P)
+ computePairsConnectedTo(CandidatePairs, PairableInsts,
+ ConnectedPairs, *P);
+ }
+
+ DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
+ << " pair connections.\n");
+ }
+
+ // This function builds a set of use tuples such that <A, B> is in the set
+ // if B is in the use tree of A. If B is in the use tree of A, then B
+ // depends on the output of A.
+ void BBVectorize::buildDepMap(
+ BasicBlock &BB,
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &PairableInstUsers) {
+ DenseSet<Value *> IsInPair;
+ for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(),
+ E = CandidatePairs.end(); C != E; ++C) {
+ IsInPair.insert(C->first);
+ IsInPair.insert(C->second);
+ }
+
+ // Iterate through the basic block, recording all Users of each
+ // pairable instruction.
+
+ BasicBlock::iterator E = BB.end();
+ for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
+ if (IsInPair.find(I) == IsInPair.end()) continue;
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ for (BasicBlock::iterator J = llvm::next(I); J != E; ++J)
+ (void) trackUsesOfI(Users, WriteSet, I, J);
+
+ for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
+ U != E; ++U)
+ PairableInstUsers.insert(ValuePair(I, *U));
+ }
+ }
+
+ // Returns true if an input to pair P is an output of pair Q and also an
+ // input of pair Q is an output of pair P. If this is the case, then these
+ // two pairs cannot be simultaneously fused.
+ bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> *PairableInstUserMap) {
+ // Two pairs are in conflict if they are mutual Users of eachother.
+ bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) ||
+ PairableInstUsers.count(ValuePair(P.first, Q.second)) ||
+ PairableInstUsers.count(ValuePair(P.second, Q.first)) ||
+ PairableInstUsers.count(ValuePair(P.second, Q.second));
+ bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) ||
+ PairableInstUsers.count(ValuePair(Q.first, P.second)) ||
+ PairableInstUsers.count(ValuePair(Q.second, P.first)) ||
+ PairableInstUsers.count(ValuePair(Q.second, P.second));
+ if (PairableInstUserMap) {
+ // FIXME: The expensive part of the cycle check is not so much the cycle
+ // check itself but this edge insertion procedure. This needs some
+ // profiling and probably a different data structure (same is true of
+ // most uses of std::multimap).
+ if (PUsesQ) {
+ VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q);
+ if (!isSecondInIteratorPair(P, QPairRange))
+ PairableInstUserMap->insert(VPPair(Q, P));
+ }
+ if (QUsesP) {
+ VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P);
+ if (!isSecondInIteratorPair(Q, PPairRange))
+ PairableInstUserMap->insert(VPPair(P, Q));
+ }
+ }
+
+ return (QUsesP && PUsesQ);
+ }
+
+ // This function walks the use graph of current pairs to see if, starting
+ // from P, the walk returns to P.
+ bool BBVectorize::pairWillFormCycle(ValuePair P,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseSet<ValuePair> &CurrentPairs) {
+ DEBUG(if (DebugCycleCheck)
+ dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
+ << *P.second << "\n");
+ // A lookup table of visisted pairs is kept because the PairableInstUserMap
+ // contains non-direct associations.
+ DenseSet<ValuePair> Visited;
+ SmallVector<ValuePair, 32> Q;
+ // General depth-first post-order traversal:
+ Q.push_back(P);
+ do {
+ ValuePair QTop = Q.pop_back_val();
+ Visited.insert(QTop);
+
+ DEBUG(if (DebugCycleCheck)
+ dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
+ << *QTop.second << "\n");
+ VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop);
+ for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first;
+ C != QPairRange.second; ++C) {
+ if (C->second == P) {
+ DEBUG(dbgs()
+ << "BBV: rejected to prevent non-trivial cycle formation: "
+ << *C->first.first << " <-> " << *C->first.second << "\n");
+ return true;
+ }
+
+ if (CurrentPairs.count(C->second) && !Visited.count(C->second))
+ Q.push_back(C->second);
+ }
+ } while (!Q.empty());
+
+ return false;
+ }
+
+ // This function builds the initial tree of connected pairs with the
+ // pair J at the root.
+ void BBVectorize::buildInitialTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &Tree, ValuePair J) {
+ // Each of these pairs is viewed as the root node of a Tree. The Tree
+ // is then walked (depth-first). As this happens, we keep track of
+ // the pairs that compose the Tree and the maximum depth of the Tree.
+ SmallVector<ValuePairWithDepth, 32> Q;
+ // General depth-first post-order traversal:
+ Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
+ do {
+ ValuePairWithDepth QTop = Q.back();
+
+ // Push each child onto the queue:
+ bool MoreChildren = false;
+ size_t MaxChildDepth = QTop.second;
+ VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first);
+ for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first;
+ k != qtRange.second; ++k) {
+ // Make sure that this child pair is still a candidate:
+ bool IsStillCand = false;
+ VPIteratorPair checkRange =
+ CandidatePairs.equal_range(k->second.first);
+ for (std::multimap<Value *, Value *>::iterator m = checkRange.first;
+ m != checkRange.second; ++m) {
+ if (m->second == k->second.second) {
+ IsStillCand = true;
+ break;
+ }
+ }
+
+ if (IsStillCand) {
+ DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second);
+ if (C == Tree.end()) {
+ size_t d = getDepthFactor(k->second.first);
+ Q.push_back(ValuePairWithDepth(k->second, QTop.second+d));
+ MoreChildren = true;
+ } else {
+ MaxChildDepth = std::max(MaxChildDepth, C->second);
+ }
+ }
+ }
+
+ if (!MoreChildren) {
+ // Record the current pair as part of the Tree:
+ Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
+ Q.pop_back();
+ }
+ } while (!Q.empty());
+ }
+
+ // Given some initial tree, prune it by removing conflicting pairs (pairs
+ // that cannot be simultaneously chosen for vectorization).
+ void BBVectorize::pruneTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &Tree,
+ DenseSet<ValuePair> &PrunedTree, ValuePair J,
+ bool UseCycleCheck) {
+ SmallVector<ValuePairWithDepth, 32> Q;
+ // General depth-first post-order traversal:
+ Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
+ do {
+ ValuePairWithDepth QTop = Q.pop_back_val();
+ PrunedTree.insert(QTop.first);
+
+ // Visit each child, pruning as necessary...
+ DenseMap<ValuePair, size_t> BestChildren;
+ VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
+ for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
+ K != QTopRange.second; ++K) {
+ DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second);
+ if (C == Tree.end()) continue;
+
+ // This child is in the Tree, now we need to make sure it is the
+ // best of any conflicting children. There could be multiple
+ // conflicting children, so first, determine if we're keeping
+ // this child, then delete conflicting children as necessary.
+
+ // It is also necessary to guard against pairing-induced
+ // dependencies. Consider instructions a .. x .. y .. b
+ // such that (a,b) are to be fused and (x,y) are to be fused
+ // but a is an input to x and b is an output from y. This
+ // means that y cannot be moved after b but x must be moved
+ // after b for (a,b) to be fused. In other words, after
+ // fusing (a,b) we have y .. a/b .. x where y is an input
+ // to a/b and x is an output to a/b: x and y can no longer
+ // be legally fused. To prevent this condition, we must
+ // make sure that a child pair added to the Tree is not
+ // both an input and output of an already-selected pair.
+
+ // Pairing-induced dependencies can also form from more complicated
+ // cycles. The pair vs. pair conflicts are easy to check, and so
+ // that is done explicitly for "fast rejection", and because for
+ // child vs. child conflicts, we may prefer to keep the current
+ // pair in preference to the already-selected child.
+ DenseSet<ValuePair> CurrentPairs;
+
+ bool CanAdd = true;
+ for (DenseMap<ValuePair, size_t>::iterator C2
+ = BestChildren.begin(), E2 = BestChildren.end();
+ C2 != E2; ++C2) {
+ if (C2->first.first == C->first.first ||
+ C2->first.first == C->first.second ||
+ C2->first.second == C->first.first ||
+ C2->first.second == C->first.second ||
+ pairsConflict(C2->first, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0)) {
+ if (C2->second >= C->second) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(C2->first);
+ }
+ }
+ if (!CanAdd) continue;
+
+ // Even worse, this child could conflict with another node already
+ // selected for the Tree. If that is the case, ignore this child.
+ for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(),
+ E2 = PrunedTree.end(); T != E2; ++T) {
+ if (T->first == C->first.first ||
+ T->first == C->first.second ||
+ T->second == C->first.first ||
+ T->second == C->first.second ||
+ pairsConflict(*T, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0)) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(*T);
+ }
+ if (!CanAdd) continue;
+
+ // And check the queue too...
+ for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(),
+ E2 = Q.end(); C2 != E2; ++C2) {
+ if (C2->first.first == C->first.first ||
+ C2->first.first == C->first.second ||
+ C2->first.second == C->first.first ||
+ C2->first.second == C->first.second ||
+ pairsConflict(C2->first, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0)) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(C2->first);
+ }
+ if (!CanAdd) continue;
+
+ // Last but not least, check for a conflict with any of the
+ // already-chosen pairs.
+ for (DenseMap<Value *, Value *>::iterator C2 =
+ ChosenPairs.begin(), E2 = ChosenPairs.end();
+ C2 != E2; ++C2) {
+ if (pairsConflict(*C2, C->first, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0)) {
+ CanAdd = false;
+ break;
+ }
+
+ CurrentPairs.insert(*C2);
+ }
+ if (!CanAdd) continue;
+
+ // To check for non-trivial cycles formed by the addition of the
+ // current pair we've formed a list of all relevant pairs, now use a
+ // graph walk to check for a cycle. We start from the current pair and
+ // walk the use tree to see if we again reach the current pair. If we
+ // do, then the current pair is rejected.
+
+ // FIXME: It may be more efficient to use a topological-ordering
+ // algorithm to improve the cycle check. This should be investigated.
+ if (UseCycleCheck &&
+ pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
+ continue;
+
+ // This child can be added, but we may have chosen it in preference
+ // to an already-selected child. Check for this here, and if a
+ // conflict is found, then remove the previously-selected child
+ // before adding this one in its place.
+ for (DenseMap<ValuePair, size_t>::iterator C2
+ = BestChildren.begin(); C2 != BestChildren.end();) {
+ if (C2->first.first == C->first.first ||
+ C2->first.first == C->first.second ||
+ C2->first.second == C->first.first ||
+ C2->first.second == C->first.second ||
+ pairsConflict(C2->first, C->first, PairableInstUsers))
+ BestChildren.erase(C2++);
+ else
+ ++C2;
+ }
+
+ BestChildren.insert(ValuePairWithDepth(C->first, C->second));
+ }
+
+ for (DenseMap<ValuePair, size_t>::iterator C
+ = BestChildren.begin(), E2 = BestChildren.end();
+ C != E2; ++C) {
+ size_t DepthF = getDepthFactor(C->first.first);
+ Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
+ }
+ } while (!Q.empty());
+ }
+
+ // This function finds the best tree of mututally-compatible connected
+ // pairs, given the choice of root pairs as an iterator range.
+ void BBVectorize::findBestTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
+ size_t &BestEffSize, VPIteratorPair ChoiceRange,
+ bool UseCycleCheck) {
+ for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
+ J != ChoiceRange.second; ++J) {
+
+ // Before going any further, make sure that this pair does not
+ // conflict with any already-selected pairs (see comment below
+ // near the Tree pruning for more details).
+ DenseSet<ValuePair> ChosenPairSet;
+ bool DoesConflict = false;
+ for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
+ E = ChosenPairs.end(); C != E; ++C) {
+ if (pairsConflict(*C, *J, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0)) {
+ DoesConflict = true;
+ break;
+ }
+
+ ChosenPairSet.insert(*C);
+ }
+ if (DoesConflict) continue;
+
+ if (UseCycleCheck &&
+ pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet))
+ continue;
+
+ DenseMap<ValuePair, size_t> Tree;
+ buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairableInstUsers, ChosenPairs, Tree, *J);
+
+ // Because we'll keep the child with the largest depth, the largest
+ // depth is still the same in the unpruned Tree.
+ size_t MaxDepth = Tree.lookup(*J);
+
+ DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {"
+ << *J->first << " <-> " << *J->second << "} of depth " <<
+ MaxDepth << " and size " << Tree.size() << "\n");
+
+ // At this point the Tree has been constructed, but, may contain
+ // contradictory children (meaning that different children of
+ // some tree node may be attempting to fuse the same instruction).
+ // So now we walk the tree again, in the case of a conflict,
+ // keep only the child with the largest depth. To break a tie,
+ // favor the first child.
+
+ DenseSet<ValuePair> PrunedTree;
+ pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
+ PrunedTree, *J, UseCycleCheck);
+
+ size_t EffSize = 0;
+ for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+ E = PrunedTree.end(); S != E; ++S)
+ EffSize += getDepthFactor(S->first);
+
+ DEBUG(if (DebugPairSelection)
+ dbgs() << "BBV: found pruned Tree for pair {"
+ << *J->first << " <-> " << *J->second << "} of depth " <<
+ MaxDepth << " and size " << PrunedTree.size() <<
+ " (effective size: " << EffSize << ")\n");
+ if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
+ BestMaxDepth = MaxDepth;
+ BestEffSize = EffSize;
+ BestTree = PrunedTree;
+ }
+ }
+ }
+
+ // Given the list of candidate pairs, this function selects those
+ // that will be fused into vector instructions.
+ void BBVectorize::choosePairs(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs) {
+ bool UseCycleCheck =
+ CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
+ std::multimap<ValuePair, ValuePair> PairableInstUserMap;
+ for (std::vector<Value *>::iterator I = PairableInsts.begin(),
+ E = PairableInsts.end(); I != E; ++I) {
+ // The number of possible pairings for this variable:
+ size_t NumChoices = CandidatePairs.count(*I);
+ if (!NumChoices) continue;
+
+ VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
+
+ // The best pair to choose and its tree:
+ size_t BestMaxDepth = 0, BestEffSize = 0;
+ DenseSet<ValuePair> BestTree;
+ findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairableInstUsers, PairableInstUserMap, ChosenPairs,
+ BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
+ UseCycleCheck);
+
+ // A tree has been chosen (or not) at this point. If no tree was
+ // chosen, then this instruction, I, cannot be paired (and is no longer
+ // considered).
+
+ DEBUG(if (BestTree.size() > 0)
+ dbgs() << "BBV: selected pairs in the best tree for: "
+ << *cast<Instruction>(*I) << "\n");
+
+ for (DenseSet<ValuePair>::iterator S = BestTree.begin(),
+ SE2 = BestTree.end(); S != SE2; ++S) {
+ // Insert the members of this tree into the list of chosen pairs.
+ ChosenPairs.insert(ValuePair(S->first, S->second));
+ DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
+ *S->second << "\n");
+
+ // Remove all candidate pairs that have values in the chosen tree.
+ for (std::multimap<Value *, Value *>::iterator K =
+ CandidatePairs.begin(); K != CandidatePairs.end();) {
+ if (K->first == S->first || K->second == S->first ||
+ K->second == S->second || K->first == S->second) {
+ // Don't remove the actual pair chosen so that it can be used
+ // in subsequent tree selections.
+ if (!(K->first == S->first && K->second == S->second))
+ CandidatePairs.erase(K++);
+ else
+ ++K;
+ } else {
+ ++K;
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n");
+ }
+
+ std::string getReplacementName(Instruction *I, bool IsInput, unsigned o,
+ unsigned n = 0) {
+ if (!I->hasName())
+ return "";
+
+ return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) +
+ (n > 0 ? "." + utostr(n) : "")).str();
+ }
+
+ // Returns the value that is to be used as the pointer input to the vector
+ // instruction that fuses I with J.
+ Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
+ Instruction *I, Instruction *J, unsigned o,
+ bool &FlipMemInputs) {
+ Value *IPtr, *JPtr;
+ unsigned IAlignment, JAlignment;
+ int64_t OffsetInElmts;
+ (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ OffsetInElmts);
+
+ // The pointer value is taken to be the one with the lowest offset.
+ Value *VPtr;
+ if (OffsetInElmts > 0) {
+ VPtr = IPtr;
+ } else {
+ FlipMemInputs = true;
+ VPtr = JPtr;
+ }
+
+ Type *ArgType = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *VArgType = getVecTypeForPair(ArgType);
+ Type *VArgPtrType = PointerType::get(VArgType,
+ cast<PointerType>(IPtr->getType())->getAddressSpace());
+ return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
+ /* insert before */ FlipMemInputs ? J : I);
+ }
+
+ void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+ unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
+ unsigned IdxOffset, std::vector<Constant*> &Mask) {
+ for (unsigned v = 0; v < NumElem/2; ++v) {
+ int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
+ if (m < 0) {
+ Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
+ } else {
+ unsigned mm = m + (int) IdxOffset;
+ if (m >= (int) NumInElem)
+ mm += (int) NumInElem;
+
+ Mask[v+MaskOffset] =
+ ConstantInt::get(Type::getInt32Ty(Context), mm);
+ }
+ }
+ }
+
+ // Returns the value that is to be used as the vector-shuffle mask to the
+ // vector instruction that fuses I with J.
+ Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context,
+ Instruction *I, Instruction *J) {
+ // This is the shuffle mask. We need to append the second
+ // mask to the first, and the numbers need to be adjusted.
+
+ Type *ArgType = I->getType();
+ Type *VArgType = getVecTypeForPair(ArgType);
+
+ // Get the total number of elements in the fused vector type.
+ // By definition, this must equal the number of elements in
+ // the final mask.
+ unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+ std::vector<Constant*> Mask(NumElem);
+
+ Type *OpType = I->getOperand(0)->getType();
+ unsigned NumInElem = cast<VectorType>(OpType)->getNumElements();
+
+ // For the mask from the first pair...
+ fillNewShuffleMask(Context, I, NumElem, 0, NumInElem, 0, Mask);
+
+ // For the mask from the second pair...
+ fillNewShuffleMask(Context, J, NumElem, NumElem/2, NumInElem, NumInElem,
+ Mask);
+
+ return ConstantVector::get(Mask);
+ }
+
+ // Returns the value to be used as the specified operand of the vector
+ // instruction that fuses I with J.
+ Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool FlipMemInputs) {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
+
+ // Compute the fused vector type for this operand
+ Type *ArgType = I->getOperand(o)->getType();
+ VectorType *VArgType = getVecTypeForPair(ArgType);
+
+ Instruction *L = I, *H = J;
+ if (FlipMemInputs) {
+ L = J;
+ H = I;
+ }
+
+ if (ArgType->isVectorTy()) {
+ unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ std::vector<Constant*> Mask(numElem);
+ for (unsigned v = 0; v < numElem; ++v)
+ Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+
+ Instruction *BV = new ShuffleVectorInst(L->getOperand(o),
+ H->getOperand(o),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o));
+ BV->insertBefore(J);
+ return BV;
+ }
+
+ // If these two inputs are the output of another vector instruction,
+ // then we should use that output directly. It might be necessary to
+ // permute it first. [When pairings are fused recursively, you can
+ // end up with cases where a large vector is decomposed into scalars
+ // using extractelement instructions, then built into size-2
+ // vectors using insertelement and the into larger vectors using
+ // shuffles. InstCombine does not simplify all of these cases well,
+ // and so we make sure that shuffles are generated here when possible.
+ ExtractElementInst *LEE
+ = dyn_cast<ExtractElementInst>(L->getOperand(o));
+ ExtractElementInst *HEE
+ = dyn_cast<ExtractElementInst>(H->getOperand(o));
+
+ if (LEE && HEE &&
+ LEE->getOperand(0)->getType() == HEE->getOperand(0)->getType()) {
+ VectorType *EEType = cast<VectorType>(LEE->getOperand(0)->getType());
+ unsigned LowIndx = cast<ConstantInt>(LEE->getOperand(1))->getZExtValue();
+ unsigned HighIndx = cast<ConstantInt>(HEE->getOperand(1))->getZExtValue();
+ if (LEE->getOperand(0) == HEE->getOperand(0)) {
+ if (LowIndx == 0 && HighIndx == 1)
+ return LEE->getOperand(0);
+
+ std::vector<Constant*> Mask(2);
+ Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx);
+ Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx);
+
+ Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0),
+ UndefValue::get(EEType),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o));
+ BV->insertBefore(J);
+ return BV;
+ }
+
+ std::vector<Constant*> Mask(2);
+ HighIndx += EEType->getNumElements();
+ Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx);
+ Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx);
+
+ Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0),
+ HEE->getOperand(0),
+ ConstantVector::get(Mask),
+ getReplacementName(I, true, o));
+ BV->insertBefore(J);
+ return BV;
+ }
+
+ Instruction *BV1 = InsertElementInst::Create(
+ UndefValue::get(VArgType),
+ L->getOperand(o), CV0,
+ getReplacementName(I, true, o, 1));
+ BV1->insertBefore(I);
+ Instruction *BV2 = InsertElementInst::Create(BV1, H->getOperand(o),
+ CV1,
+ getReplacementName(I, true, o, 2));
+ BV2->insertBefore(J);
+ return BV2;
+ }
+
+ // This function creates an array of values that will be used as the inputs
+ // to the vector instruction that fuses I with J.
+ void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
+ Instruction *I, Instruction *J,
+ SmallVector<Value *, 3> &ReplacedOperands,
+ bool &FlipMemInputs) {
+ FlipMemInputs = false;
+ unsigned NumOperands = I->getNumOperands();
+
+ for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
+ // Iterate backward so that we look at the store pointer
+ // first and know whether or not we need to flip the inputs.
+
+ if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
+ // This is the pointer for a load/store instruction.
+ ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o,
+ FlipMemInputs);
+ continue;
+ } else if (isa<CallInst>(I)) {
+ Function *F = cast<CallInst>(I)->getCalledFunction();
+ unsigned IID = F->getIntrinsicID();
+ if (o == NumOperands-1) {
+ BasicBlock &BB = *I->getParent();
+
+ Module *M = BB.getParent()->getParent();
+ Type *ArgType = I->getType();
+ Type *VArgType = getVecTypeForPair(ArgType);
+
+ // FIXME: is it safe to do this here?
+ ReplacedOperands[o] = Intrinsic::getDeclaration(M,
+ (Intrinsic::ID) IID, VArgType);
+ continue;
+ } else if (IID == Intrinsic::powi && o == 1) {
+ // The second argument of powi is a single integer and we've already
+ // checked that both arguments are equal. As a result, we just keep
+ // I's second argument.
+ ReplacedOperands[o] = I->getOperand(o);
+ continue;
+ }
+ } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
+ ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
+ continue;
+ }
+
+ ReplacedOperands[o] =
+ getReplacementInput(Context, I, J, o, FlipMemInputs);
+ }
+ }
+
+ // This function creates two values that represent the outputs of the
+ // original I and J instructions. These are generally vector shuffles
+ // or extracts. In many cases, these will end up being unused and, thus,
+ // eliminated by later passes.
+ void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, Instruction *K,
+ Instruction *&InsertionPt,
+ Instruction *&K1, Instruction *&K2,
+ bool &FlipMemInputs) {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
+
+ if (isa<StoreInst>(I)) {
+ AA->replaceWithNewValue(I, K);
+ AA->replaceWithNewValue(J, K);
+ } else {
+ Type *IType = I->getType();
+ Type *VType = getVecTypeForPair(IType);
+
+ if (IType->isVectorTy()) {
+ unsigned numElem = cast<VectorType>(IType)->getNumElements();
+ std::vector<Constant*> Mask1(numElem), Mask2(numElem);
+ for (unsigned v = 0; v < numElem; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElem+v);
+ }
+
+ K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(
+ FlipMemInputs ? Mask2 : Mask1),
+ getReplacementName(K, false, 1));
+ K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(
+ FlipMemInputs ? Mask1 : Mask2),
+ getReplacementName(K, false, 2));
+ } else {
+ K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0,
+ getReplacementName(K, false, 1));
+ K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1,
+ getReplacementName(K, false, 2));
+ }
+
+ K1->insertAfter(K);
+ K2->insertAfter(K1);
+ InsertionPt = K2;
+ }
+ }
+
+ // Move all uses of the function I (including pairing-induced uses) after J.
+ bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *I, Instruction *J) {
+ // Skip to the first instruction past I.
+ BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ for (; cast<Instruction>(L) != J; ++L)
+ (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet);
+
+ assert(cast<Instruction>(L) == J &&
+ "Tracking has not proceeded far enough to check for dependencies");
+ // If J is now in the use set of I, then trackUsesOfI will return true
+ // and we have a dependency cycle (and the fusing operation must abort).
+ return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet);
+ }
+
+ // Move all uses of the function I (including pairing-induced uses) after J.
+ void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *&InsertionPt,
+ Instruction *I, Instruction *J) {
+ // Skip to the first instruction past I.
+ BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+ for (; cast<Instruction>(L) != J;) {
+ if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) {
+ // Move this instruction
+ Instruction *InstToMove = L; ++L;
+
+ DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
+ " to after " << *InsertionPt << "\n");
+ InstToMove->removeFromParent();
+ InstToMove->insertAfter(InsertionPt);
+ InsertionPt = InstToMove;
+ } else {
+ ++L;
+ }
+ }
+ }
+
+ // Collect all load instruction that are in the move set of a given first
+ // pair member. These loads depend on the first instruction, I, and so need
+ // to be moved after J (the second instruction) when the pair is fused.
+ void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *I) {
+ // Skip to the first instruction past I.
+ BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+ DenseSet<Value *> Users;
+ AliasSetTracker WriteSet(*AA);
+
+ // Note: We cannot end the loop when we reach J because J could be moved
+ // farther down the use chain by another instruction pairing. Also, J
+ // could be before I if this is an inverted input.
+ for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
+ if (trackUsesOfI(Users, WriteSet, I, L)) {
+ if (L->mayReadFromMemory())
+ LoadMoveSet.insert(ValuePair(L, I));
+ }
+ }
+ }
+
+ // In cases where both load/stores and the computation of their pointers
+ // are chosen for vectorization, we can end up in a situation where the
+ // aliasing analysis starts returning different query results as the
+ // process of fusing instruction pairs continues. Because the algorithm
+ // relies on finding the same use trees here as were found earlier, we'll
+ // need to precompute the necessary aliasing information here and then
+ // manually update it during the fusion process.
+ void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ std::multimap<Value *, Value *> &LoadMoveSet) {
+ for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+ PIE = PairableInsts.end(); PI != PIE; ++PI) {
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
+ if (P == ChosenPairs.end()) continue;
+
+ Instruction *I = cast<Instruction>(P->first);
+ collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I);
+ }
+ }
+
+ // This function fuses the chosen instruction pairs into vector instructions,
+ // taking care preserve any needed scalar outputs and, then, it reorders the
+ // remaining instructions as needed (users of the first member of the pair
+ // need to be moved to after the location of the second member of the pair
+ // because the vector instruction is inserted in the location of the pair's
+ // second member).
+ void BBVectorize::fuseChosenPairs(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs) {
+ LLVMContext& Context = BB.getContext();
+
+ // During the vectorization process, the order of the pairs to be fused
+ // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
+ // list. After a pair is fused, the flipped pair is removed from the list.
+ std::vector<ValuePair> FlippedPairs;
+ FlippedPairs.reserve(ChosenPairs.size());
+ for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
+ E = ChosenPairs.end(); P != E; ++P)
+ FlippedPairs.push_back(ValuePair(P->second, P->first));
+ for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(),
+ E = FlippedPairs.end(); P != E; ++P)
+ ChosenPairs.insert(*P);
+
+ std::multimap<Value *, Value *> LoadMoveSet;
+ collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
+
+ DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
+
+ for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
+ if (P == ChosenPairs.end()) {
+ ++PI;
+ continue;
+ }
+
+ if (getDepthFactor(P->first) == 0) {
+ // These instructions are not really fused, but are tracked as though
+ // they are. Any case in which it would be interesting to fuse them
+ // will be taken care of by InstCombine.
+ --NumFusedOps;
+ ++PI;
+ continue;
+ }
+
+ Instruction *I = cast<Instruction>(P->first),
+ *J = cast<Instruction>(P->second);
+
+ DEBUG(dbgs() << "BBV: fusing: " << *I <<
+ " <-> " << *J << "\n");
+
+ // Remove the pair and flipped pair from the list.
+ DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second);
+ assert(FP != ChosenPairs.end() && "Flipped pair not found in list");
+ ChosenPairs.erase(FP);
+ ChosenPairs.erase(P);
+
+ if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) {
+ DEBUG(dbgs() << "BBV: fusion of: " << *I <<
+ " <-> " << *J <<
+ " aborted because of non-trivial dependency cycle\n");
+ --NumFusedOps;
+ ++PI;
+ continue;
+ }
+
+ bool FlipMemInputs;
+ unsigned NumOperands = I->getNumOperands();
+ SmallVector<Value *, 3> ReplacedOperands(NumOperands);
+ getReplacementInputsForPair(Context, I, J, ReplacedOperands,
+ FlipMemInputs);
+
+ // Make a copy of the original operation, change its type to the vector
+ // type and replace its operands with the vector operands.
+ Instruction *K = I->clone();
+ if (I->hasName()) K->takeName(I);
+
+ if (!isa<StoreInst>(K))
+ K->mutateType(getVecTypeForPair(I->getType()));
+
+ for (unsigned o = 0; o < NumOperands; ++o)
+ K->setOperand(o, ReplacedOperands[o]);
+
+ // If we've flipped the memory inputs, make sure that we take the correct
+ // alignment.
+ if (FlipMemInputs) {
+ if (isa<StoreInst>(K))
+ cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment());
+ else
+ cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment());
+ }
+
+ K->insertAfter(J);
+
+ // Instruction insertion point:
+ Instruction *InsertionPt = K;
+ Instruction *K1 = 0, *K2 = 0;
+ replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2,
+ FlipMemInputs);
+
+ // The use tree of the first original instruction must be moved to after
+ // the location of the second instruction. The entire use tree of the
+ // first instruction is disjoint from the input tree of the second
+ // (by definition), and so commutes with it.
+
+ moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J);
+
+ if (!isa<StoreInst>(I)) {
+ I->replaceAllUsesWith(K1);
+ J->replaceAllUsesWith(K2);
+ AA->replaceWithNewValue(I, K1);
+ AA->replaceWithNewValue(J, K2);
+ }
+
+ // Instructions that may read from memory may be in the load move set.
+ // Once an instruction is fused, we no longer need its move set, and so
+ // the values of the map never need to be updated. However, when a load
+ // is fused, we need to merge the entries from both instructions in the
+ // pair in case those instructions were in the move set of some other
+ // yet-to-be-fused pair. The loads in question are the keys of the map.
+ if (I->mayReadFromMemory()) {
+ std::vector<ValuePair> NewSetMembers;
+ VPIteratorPair IPairRange = LoadMoveSet.equal_range(I);
+ VPIteratorPair JPairRange = LoadMoveSet.equal_range(J);
+ for (std::multimap<Value *, Value *>::iterator N = IPairRange.first;
+ N != IPairRange.second; ++N)
+ NewSetMembers.push_back(ValuePair(K, N->second));
+ for (std::multimap<Value *, Value *>::iterator N = JPairRange.first;
+ N != JPairRange.second; ++N)
+ NewSetMembers.push_back(ValuePair(K, N->second));
+ for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
+ AE = NewSetMembers.end(); A != AE; ++A)
+ LoadMoveSet.insert(*A);
+ }
+
+ // Before removing I, set the iterator to the next instruction.
+ PI = llvm::next(BasicBlock::iterator(I));
+ if (cast<Instruction>(PI) == J)
+ ++PI;
+
+ SE->forgetValue(I);
+ SE->forgetValue(J);
+ I->eraseFromParent();
+ J->eraseFromParent();
+ }
+
+ DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
+ }
+}
+
+char BBVectorize::ID = 0;
+static const char bb_vectorize_name[] = "Basic-Block Vectorization";
+INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
+
+BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
+ return new BBVectorize(C);
+}
+
+bool
+llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
+ BBVectorize BBVectorizer(P, C);
+ return BBVectorizer.vectorizeBB(BB);
+}
+
+//===----------------------------------------------------------------------===//
+VectorizeConfig::VectorizeConfig() {
+ VectorBits = ::VectorBits;
+ VectorizeInts = !::NoInts;
+ VectorizeFloats = !::NoFloats;
+ VectorizeCasts = !::NoCasts;
+ VectorizeMath = !::NoMath;
+ VectorizeFMA = !::NoFMA;
+ VectorizeMemOps = !::NoMemOps;
+ AlignedOnly = ::AlignedOnly;
+ ReqChainDepth= ::ReqChainDepth;
+ SearchLimit = ::SearchLimit;
+ MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
+ SplatBreaksChain = ::SplatBreaksChain;
+ MaxInsts = ::MaxInsts;
+ MaxIter = ::MaxIter;
+ NoMemOpBoost = ::NoMemOpBoost;
+ FastDep = ::FastDep;
+}
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
new file mode 100644
index 000000000000..4b6693015ce9
--- /dev/null
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMVectorize
+ BBVectorize.cpp
+ Vectorize.cpp
+ )
diff --git a/lib/Transforms/Vectorize/LLVMBuild.txt b/lib/Transforms/Vectorize/LLVMBuild.txt
new file mode 100644
index 000000000000..7167d273ae50
--- /dev/null
+++ b/lib/Transforms/Vectorize/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Transforms/Scalar/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Vectorize
+parent = Transforms
+library_name = Vectorize
+required_libraries = Analysis Core InstCombine Support Target TransformUtils
+
diff --git a/lib/Target/CBackend/Makefile b/lib/Transforms/Vectorize/Makefile
index 621948a9f4ac..86c36585f23f 100644
--- a/lib/Target/CBackend/Makefile
+++ b/lib/Transforms/Vectorize/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===##
+##===- lib/Transforms/Vectorize/Makefile -----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -8,9 +8,8 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../..
-LIBRARYNAME = LLVMCBackend
-DIRS = TargetInfo
+LIBRARYNAME = LLVMVectorize
+BUILD_ARCHIVE = 1
include $(LEVEL)/Makefile.common
-CompileCommonOpts += -Wno-format
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
new file mode 100644
index 000000000000..1ef60029bcf4
--- /dev/null
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -0,0 +1,39 @@
+//===-- Vectorize.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
+// implements several vectorization transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Vectorize.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Vectorize.h"
+
+using namespace llvm;
+
+/// initializeVectorizationPasses - Initialize all passes linked into the
+/// Vectorization library.
+void llvm::initializeVectorization(PassRegistry &Registry) {
+ initializeBBVectorizePass(Registry);
+}
+
+void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
+ initializeVectorization(*unwrap(R));
+}
+
+void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBBVectorizePass());
+}
+
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 18308f27cfe4..7b39efb7c7a0 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -89,7 +89,6 @@ enum PrefixType {
static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
assert(!Name.empty() && "Cannot get empty name!");
switch (Prefix) {
- default: llvm_unreachable("Bad prefix!");
case NoPrefix: break;
case GlobalPrefix: OS << '@'; break;
case LabelPrefix: break;
@@ -189,6 +188,7 @@ void TypePrinting::incorporateTypes(const Module &M) {
void TypePrinting::print(Type *Ty, raw_ostream &OS) {
switch (Ty->getTypeID()) {
case Type::VoidTyID: OS << "void"; break;
+ case Type::HalfTyID: OS << "half"; break;
case Type::FloatTyID: OS << "float"; break;
case Type::DoubleTyID: OS << "double"; break;
case Type::X86_FP80TyID: OS << "x86_fp80"; break;
@@ -231,7 +231,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
if (I != NumberedTypes.end())
OS << '%' << I->second;
else // Not enumerated, print the hex address.
- OS << "%\"type 0x" << STy << '\"';
+ OS << "%\"type " << STy << '\"';
return;
}
case Type::PointerTyID: {
@@ -708,31 +708,37 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
- &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf ||
+ &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle ||
+ &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) {
// We would like to output the FP constant value in exponential notation,
// but we cannot do this if doing so will lose precision. Check here to
// make sure that we only output it in exponential format if we can parse
// the value back and get the same value.
//
bool ignored;
+ bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
- double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
- CFP->getValueAPF().convertToFloat();
- SmallString<128> StrVal;
- raw_svector_ostream(StrVal) << Val;
-
- // Check to make sure that the stringized number is not some string like
- // "Inf" or NaN, that atof will accept, but the lexer will not. Check
- // that the string matches the "[-+]?[0-9]" regex.
- //
- if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
- ((StrVal[0] == '-' || StrVal[0] == '+') &&
- (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
- // Reparse stringized version!
- if (atof(StrVal.c_str()) == Val) {
- Out << StrVal.str();
- return;
+ bool isInf = CFP->getValueAPF().isInfinity();
+ bool isNaN = CFP->getValueAPF().isNaN();
+ if (!isHalf && !isInf && !isNaN) {
+ double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+ CFP->getValueAPF().convertToFloat();
+ SmallString<128> StrVal;
+ raw_svector_ostream(StrVal) << Val;
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN, that atof will accept, but the lexer will not. Check
+ // that the string matches the "[-+]?[0-9]" regex.
+ //
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+ // Reparse stringized version!
+ if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
+ Out << StrVal.str();
+ return;
+ }
}
}
// Otherwise we could not reparse it to exactly the same value, so we must
@@ -743,7 +749,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
"assuming that double is 64 bits!");
char Buffer[40];
APFloat apf = CFP->getValueAPF();
- // Floats are represented in ASCII IR as double, convert.
+ // Halves and floats are represented in ASCII IR as double, convert.
if (!isDouble)
apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
&ignored);
@@ -823,35 +829,53 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
}
if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ Type *ETy = CA->getType()->getElementType();
+ Out << '[';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(0),
+ &TypePrinter, Machine,
+ Context);
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+ Context);
+ }
+ Out << ']';
+ return;
+ }
+
+ if (const ConstantDataArray *CA = dyn_cast<ConstantDataArray>(CV)) {
// As a special case, print the array as a string if it is an array of
// i8 with ConstantInt values.
- //
- Type *ETy = CA->getType()->getElementType();
if (CA->isString()) {
Out << "c\"";
PrintEscapedString(CA->getAsString(), Out);
Out << '"';
- } else { // Cannot output in string format...
- Out << '[';
- if (CA->getNumOperands()) {
- TypePrinter.print(ETy, Out);
- Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(0),
- &TypePrinter, Machine,
- Context);
- for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
- Out << ", ";
- TypePrinter.print(ETy, Out);
- Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
- Context);
- }
- }
- Out << ']';
+ return;
}
+
+ Type *ETy = CA->getType()->getElementType();
+ Out << '[';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
+ &TypePrinter, Machine,
+ Context);
+ for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
+ Machine, Context);
+ }
+ Out << ']';
return;
}
+
if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
if (CS->getType()->isPacked())
Out << '<';
@@ -882,21 +906,19 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
- if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
- Type *ETy = CP->getType()->getElementType();
- assert(CP->getNumOperands() > 0 &&
- "Number of operands for a PackedConst must be > 0");
+ if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
+ Type *ETy = CV->getType()->getVectorElementType();
Out << '<';
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
- Context);
- for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
+ Machine, Context);
+ for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){
Out << ", ";
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
+ Machine, Context);
}
Out << '>';
return;
@@ -1162,7 +1184,6 @@ void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
return;
switch (SynchScope) {
- default: Out << " <bad scope " << int(SynchScope) << ">"; break;
case SingleThread: Out << " singlethread"; break;
case CrossThread: break;
}
@@ -1710,13 +1731,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ";
writeOperand(SI.getDefaultDest(), true);
Out << " [";
- // Skip the first item since that's the default case.
- unsigned NumCases = SI.getNumCases();
- for (unsigned i = 1; i < NumCases; ++i) {
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
Out << "\n ";
- writeOperand(SI.getCaseValue(i), true);
+ writeOperand(i.getCaseValue(), true);
Out << ", ";
- writeOperand(SI.getSuccessor(i), true);
+ writeOperand(i.getCaseSuccessor(), true);
}
Out << "\n ]";
} else if (isa<IndirectBrInst>(I)) {
@@ -1988,7 +2008,7 @@ static void WriteMDNodeComment(const MDNode *Node,
if (!CI) return;
APInt Val = CI->getValue();
APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
- if (Val.ult(LLVMDebugVersion))
+ if (Val.ult(LLVMDebugVersion11))
return;
Out.PadToColumn(50);
@@ -2110,3 +2130,6 @@ void Type::dump() const { print(dbgs()); }
// Module::dump() - Allow printing of Modules from the debugger.
void Module::dump() const { print(dbgs(), 0); }
+
+// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 485be75d1b12..c05132be5a44 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -76,6 +76,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
Result += "naked ";
if (Attrs & Attribute::NonLazyBind)
Result += "nonlazybind ";
+ if (Attrs & Attribute::AddressSafety)
+ Result += "address_safety ";
if (Attrs & Attribute::StackAlignment) {
Result += "alignstack(";
Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
@@ -152,8 +154,10 @@ public:
}
static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr,
unsigned NumAttrs) {
- for (unsigned i = 0; i != NumAttrs; ++i)
- ID.AddInteger(uint64_t(Attr[i].Attrs) << 32 | unsigned(Attr[i].Index));
+ for (unsigned i = 0; i != NumAttrs; ++i) {
+ ID.AddInteger(Attr[i].Attrs.Raw());
+ ID.AddInteger(Attr[i].Index);
+ }
}
};
}
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index b849d3ef8dc2..ea3d4ba22436 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -38,105 +38,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return false;
Name = Name.substr(5); // Strip off "llvm."
- FunctionType *FTy = F->getFunctionType();
- Module *M = F->getParent();
-
switch (Name[0]) {
default: break;
- case 'a':
- if (Name.startswith("atomic.cmp.swap") ||
- Name.startswith("atomic.swap") ||
- Name.startswith("atomic.load.add") ||
- Name.startswith("atomic.load.sub") ||
- Name.startswith("atomic.load.and") ||
- Name.startswith("atomic.load.nand") ||
- Name.startswith("atomic.load.or") ||
- Name.startswith("atomic.load.xor") ||
- Name.startswith("atomic.load.max") ||
- Name.startswith("atomic.load.min") ||
- Name.startswith("atomic.load.umax") ||
- Name.startswith("atomic.load.umin"))
- return true;
- case 'i':
- // This upgrades the old llvm.init.trampoline to the new
- // llvm.init.trampoline and llvm.adjust.trampoline pair.
- if (Name == "init.trampoline") {
- // The new llvm.init.trampoline returns nothing.
- if (FTy->getReturnType()->isVoidTy())
- break;
-
- assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
-
- // Change the name of the old intrinsic so that we can play with its type.
- std::string NameTmp = F->getName();
- F->setName("");
- NewFn = cast<Function>(M->getOrInsertFunction(
- NameTmp,
- Type::getVoidTy(M->getContext()),
- FTy->getParamType(0), FTy->getParamType(1),
- FTy->getParamType(2), (Type *)0));
+ case 'c': {
+ if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ F->arg_begin()->getType());
return true;
}
- case 'm':
- if (Name == "memory.barrier")
- return true;
- case 'p':
- // This upgrades the llvm.prefetch intrinsic to accept one more parameter,
- // which is a instruction / data cache identifier. The old version only
- // implicitly accepted the data version.
- if (Name == "prefetch") {
- // Don't do anything if it has the correct number of arguments already
- if (FTy->getNumParams() == 4)
- break;
-
- assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
- // We first need to change the name of the old (bad) intrinsic, because
- // its type is incorrect, but we cannot overload that name. We
- // arbitrarily unique it here allowing us to construct a correctly named
- // and typed function below.
- std::string NameTmp = F->getName();
- F->setName("");
- NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
- FTy->getReturnType(),
- FTy->getParamType(0),
- FTy->getParamType(1),
- FTy->getParamType(2),
- FTy->getParamType(2),
- (Type*)0));
+ if (Name.startswith("cttz.") && F->arg_size() == 1) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+ F->arg_begin()->getType());
return true;
}
-
break;
+ }
case 'x': {
- const char *NewFnName = NULL;
- // This fixes the poorly named crc32 intrinsics.
- if (Name == "x86.sse42.crc32.8")
- NewFnName = "llvm.x86.sse42.crc32.32.8";
- else if (Name == "x86.sse42.crc32.16")
- NewFnName = "llvm.x86.sse42.crc32.32.16";
- else if (Name == "x86.sse42.crc32.32")
- NewFnName = "llvm.x86.sse42.crc32.32.32";
- else if (Name == "x86.sse42.crc64.8")
- NewFnName = "llvm.x86.sse42.crc32.64.8";
- else if (Name == "x86.sse42.crc64.64")
- NewFnName = "llvm.x86.sse42.crc32.64.64";
-
- if (NewFnName) {
- F->setName(NewFnName);
- NewFn = F;
+ if (Name.startswith("x86.sse2.pcmpeq.") ||
+ Name.startswith("x86.sse2.pcmpgt.") ||
+ Name.startswith("x86.avx2.pcmpeq.") ||
+ Name.startswith("x86.avx2.pcmpgt.")) {
+ NewFn = 0;
return true;
}
-
- // Calls to these instructions are transformed into unaligned loads.
- if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
- Name == "x86.sse2.loadu.pd")
- return true;
-
- // Calls to these instructions are transformed into nontemporal stores.
- if (Name == "x86.sse.movnt.ps" || Name == "x86.sse2.movnt.dq" ||
- Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
- return true;
-
break;
}
}
@@ -171,188 +97,52 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
- ImmutableCallSite CS(CI);
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
- assert(F && "CallInst has no function associated with it.");
+ assert(F && "Intrinsic call is not direct?");
if (!NewFn) {
- if (F->getName() == "llvm.x86.sse.loadu.ps" ||
- F->getName() == "llvm.x86.sse2.loadu.dq" ||
- F->getName() == "llvm.x86.sse2.loadu.pd") {
- // Convert to a native, unaligned load.
- Type *VecTy = CI->getType();
- Type *IntTy = IntegerType::get(C, 128);
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
- PointerType::getUnqual(IntTy),
- "cast");
- LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
- LI->setAlignment(1); // Unaligned load.
- BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
-
- // Fix up all the uses with our new load.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(BC);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
- F->getName() == "llvm.x86.sse2.movnt.dq" ||
- F->getName() == "llvm.x86.sse2.movnt.pd" ||
- F->getName() == "llvm.x86.sse2.movnt.i") {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- Module *M = F->getParent();
- SmallVector<Value *, 1> Elts;
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
- MDNode *Node = MDNode::get(C, Elts);
-
- Value *Arg0 = CI->getArgOperand(0);
- Value *Arg1 = CI->getArgOperand(1);
-
- // Convert the type of the pointer to a pointer to the stored type.
- Value *BC = Builder.CreateBitCast(Arg0,
- PointerType::getUnqual(Arg1->getType()),
- "cast");
- StoreInst *SI = Builder.CreateStore(Arg1, BC);
- SI->setMetadata(M->getMDKindID("nontemporal"), Node);
- SI->setAlignment(16);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- } else if (F->getName().startswith("llvm.atomic.cmp.swap")) {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
- Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0),
- CI->getArgOperand(1),
- CI->getArgOperand(2),
- Monotonic);
-
- // Replace intrinsic.
- Val->takeName(CI);
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Val);
- CI->eraseFromParent();
- } else if (F->getName().startswith("llvm.atomic")) {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- AtomicRMWInst::BinOp Op;
- if (F->getName().startswith("llvm.atomic.swap"))
- Op = AtomicRMWInst::Xchg;
- else if (F->getName().startswith("llvm.atomic.load.add"))
- Op = AtomicRMWInst::Add;
- else if (F->getName().startswith("llvm.atomic.load.sub"))
- Op = AtomicRMWInst::Sub;
- else if (F->getName().startswith("llvm.atomic.load.and"))
- Op = AtomicRMWInst::And;
- else if (F->getName().startswith("llvm.atomic.load.nand"))
- Op = AtomicRMWInst::Nand;
- else if (F->getName().startswith("llvm.atomic.load.or"))
- Op = AtomicRMWInst::Or;
- else if (F->getName().startswith("llvm.atomic.load.xor"))
- Op = AtomicRMWInst::Xor;
- else if (F->getName().startswith("llvm.atomic.load.max"))
- Op = AtomicRMWInst::Max;
- else if (F->getName().startswith("llvm.atomic.load.min"))
- Op = AtomicRMWInst::Min;
- else if (F->getName().startswith("llvm.atomic.load.umax"))
- Op = AtomicRMWInst::UMax;
- else if (F->getName().startswith("llvm.atomic.load.umin"))
- Op = AtomicRMWInst::UMin;
- else
- llvm_unreachable("Unknown atomic");
-
- Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0),
- CI->getArgOperand(1),
- Monotonic);
-
- // Replace intrinsic.
- Val->takeName(CI);
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Val);
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.memory.barrier") {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- // Note that this conversion ignores the "device" bit; it was not really
- // well-defined, and got abused because nobody paid enough attention to
- // get it right. In practice, this probably doesn't matter; application
- // code generally doesn't need anything stronger than
- // SequentiallyConsistent (and realistically, SequentiallyConsistent
- // is lowered to a strong enough barrier for almost anything).
-
- if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue())
- Builder.CreateFence(SequentiallyConsistent);
- else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue())
- Builder.CreateFence(Release);
- else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue())
- Builder.CreateFence(Acquire);
- else
- Builder.CreateFence(AcquireRelease);
-
- // Remove intrinsic.
- CI->eraseFromParent();
+ // Get the Function's name.
+ StringRef Name = F->getName();
+
+ Value *Rep;
+ // Upgrade packed integer vector compares intrinsics to compare instructions
+ if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
+ Name.startswith("llvm.x86.avx2.pcmpeq.")) {
+ Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+ "pcmpeq");
+ // need to sign extend since icmp returns vector of i1
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
+ Name.startswith("llvm.x86.avx2.pcmpgt.")) {
+ Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+ "pcmpgt");
+ // need to sign extend since icmp returns vector of i1
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
- return;
- }
- switch (NewFn->getIntrinsicID()) {
- case Intrinsic::prefetch: {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
- llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
-
- // Add the extra "data cache" argument
- Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2),
- llvm::ConstantInt::get(I32Ty, 1) };
- CallInst *NewCI = CallInst::Create(NewFn, Operands,
- CI->getName(), CI);
- NewCI->setTailCall(CI->isTailCall());
- NewCI->setCallingConv(CI->getCallingConv());
- // Handle any uses of the old CallInst.
- if (!CI->use_empty())
- // Replace all uses of the old call with the new cast which has the
- // correct type.
- CI->replaceAllUsesWith(NewCI);
-
- // Clean up the old call now that it has been completely upgraded.
+ CI->replaceAllUsesWith(Rep);
CI->eraseFromParent();
- break;
+ return;
}
- case Intrinsic::init_trampoline: {
-
- // Transform
- // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
- // to
- // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
- // %tramp = call i8* llvm.adjust.trampoline (i8* %x)
-
- Function *AdjustTrampolineFn =
- cast<Function>(Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::adjust_trampoline));
-
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI);
- Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2));
-
- CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
- CI->getArgOperand(0),
- CI->getName());
- if (!CI->use_empty())
- CI->replaceAllUsesWith(AdjustCall);
+ switch (NewFn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unknown function for CallInst upgrade.");
+
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ assert(CI->getNumArgOperands() == 1 &&
+ "Mismatch between function args and call args");
+ StringRef Name = CI->getName();
+ CI->setName(Name + ".old");
+ CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+ Builder.getFalse(), Name));
CI->eraseFromParent();
- break;
- }
+ return;
}
}
@@ -378,291 +168,3 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
}
}
-/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
-/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
-/// strips that use.
-void llvm::CheckDebugInfoIntrinsics(Module *M) {
- if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
- while (!FuncStart->use_empty())
- cast<CallInst>(FuncStart->use_back())->eraseFromParent();
- FuncStart->eraseFromParent();
- }
-
- if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
- while (!StopPoint->use_empty())
- cast<CallInst>(StopPoint->use_back())->eraseFromParent();
- StopPoint->eraseFromParent();
- }
-
- if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
- while (!RegionStart->use_empty())
- cast<CallInst>(RegionStart->use_back())->eraseFromParent();
- RegionStart->eraseFromParent();
- }
-
- if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
- while (!RegionEnd->use_empty())
- cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
- RegionEnd->eraseFromParent();
- }
-
- if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
- if (!Declare->use_empty()) {
- DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
- if (!isa<MDNode>(DDI->getArgOperand(0)) ||
- !isa<MDNode>(DDI->getArgOperand(1))) {
- while (!Declare->use_empty()) {
- CallInst *CI = cast<CallInst>(Declare->use_back());
- CI->eraseFromParent();
- }
- Declare->eraseFromParent();
- }
- }
- }
-}
-
-/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic
-/// calls reachable from the unwind basic block.
-static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn,
- CallInst *&Sel,
- SmallPtrSet<BasicBlock*, 8> &Visited) {
- if (!Visited.insert(BB)) return;
-
- for (BasicBlock::iterator
- I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- switch (CI->getCalledFunction()->getIntrinsicID()) {
- default: break;
- case Intrinsic::eh_exception:
- assert(!Exn && "Found more than one eh.exception call!");
- Exn = CI;
- break;
- case Intrinsic::eh_selector:
- assert(!Sel && "Found more than one eh.selector call!");
- Sel = CI;
- break;
- }
-
- if (Exn && Sel) return;
- }
- }
-
- if (Exn && Sel) return;
-
- for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- FindExnAndSelIntrinsics(*I, Exn, Sel, Visited);
- if (Exn && Sel) return;
- }
-}
-
-/// TransferClausesToLandingPadInst - Transfer the exception handling clauses
-/// from the eh_selector call to the new landingpad instruction.
-static void TransferClausesToLandingPadInst(LandingPadInst *LPI,
- CallInst *EHSel) {
- LLVMContext &Context = LPI->getContext();
- unsigned N = EHSel->getNumArgOperands();
-
- for (unsigned i = N - 1; i > 1; --i) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){
- unsigned FilterLength = CI->getZExtValue();
- unsigned FirstCatch = i + FilterLength + !FilterLength;
- assert(FirstCatch <= N && "Invalid filter length");
-
- if (FirstCatch < N)
- for (unsigned j = FirstCatch; j < N; ++j) {
- Value *Val = EHSel->getArgOperand(j);
- if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
- LPI->addClause(EHSel->getArgOperand(j));
- } else {
- GlobalVariable *GV = cast<GlobalVariable>(Val);
- LPI->addClause(GV->getInitializer());
- }
- }
-
- if (!FilterLength) {
- // Cleanup.
- LPI->setCleanup(true);
- } else {
- // Filter.
- SmallVector<Constant *, 4> TyInfo;
- TyInfo.reserve(FilterLength - 1);
- for (unsigned j = i + 1; j < FirstCatch; ++j)
- TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j)));
- ArrayType *AType =
- ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() :
- PointerType::getUnqual(Type::getInt8Ty(Context)),
- TyInfo.size());
- LPI->addClause(ConstantArray::get(AType, TyInfo));
- }
-
- N = i;
- }
- }
-
- if (N > 2)
- for (unsigned j = 2; j < N; ++j) {
- Value *Val = EHSel->getArgOperand(j);
- if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
- LPI->addClause(EHSel->getArgOperand(j));
- } else {
- GlobalVariable *GV = cast<GlobalVariable>(Val);
- LPI->addClause(GV->getInitializer());
- }
- }
-}
-
-/// This function upgrades the old pre-3.0 exception handling system to the new
-/// one. N.B. This will be removed in 3.1.
-void llvm::UpgradeExceptionHandling(Module *M) {
- Function *EHException = M->getFunction("llvm.eh.exception");
- Function *EHSelector = M->getFunction("llvm.eh.selector");
- if (!EHException || !EHSelector)
- return;
-
- LLVMContext &Context = M->getContext();
- Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context));
- Type *SelTy = Type::getInt32Ty(Context);
- Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL);
-
- // This map links the invoke instruction with the eh.exception and eh.selector
- // calls associated with it.
- DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap;
- for (Module::iterator
- I = M->begin(), E = M->end(); I != E; ++I) {
- Function &F = *I;
-
- for (Function::iterator
- II = F.begin(), IE = F.end(); II != IE; ++II) {
- BasicBlock *BB = &*II;
- InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator());
- if (!Inst) continue;
- BasicBlock *UnwindDest = Inst->getUnwindDest();
- if (UnwindDest->isLandingPad()) continue; // Already converted.
-
- SmallPtrSet<BasicBlock*, 8> Visited;
- CallInst *Exn = 0;
- CallInst *Sel = 0;
- FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited);
- assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!");
- InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel);
- }
- }
-
- // This map stores the slots where the exception object and selector value are
- // stored within a function.
- DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap;
- SmallPtrSet<Instruction*, 32> DeadInsts;
- for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
- I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
- I != E; ++I) {
- InvokeInst *Invoke = I->first;
- BasicBlock *UnwindDest = Invoke->getUnwindDest();
- Function *F = UnwindDest->getParent();
- std::pair<Value*, Value*> EHIntrinsics = I->second;
- CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
- CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
-
- // Store the exception object and selector value in the entry block.
- Value *ExnSlot = 0;
- Value *SelSlot = 0;
- if (!FnToLPadSlotMap[F].first) {
- BasicBlock *Entry = &F->front();
- ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator());
- SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator());
- FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot);
- } else {
- ExnSlot = FnToLPadSlotMap[F].first;
- SelSlot = FnToLPadSlotMap[F].second;
- }
-
- if (!UnwindDest->getSinglePredecessor()) {
- // The unwind destination doesn't have a single predecessor. Create an
- // unwind destination which has only one predecessor.
- BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad",
- UnwindDest->getParent());
- BranchInst::Create(UnwindDest, NewBB);
- Invoke->setUnwindDest(NewBB);
-
- // Fix up any PHIs in the original unwind destination block.
- for (BasicBlock::iterator
- II = UnwindDest->begin(); isa<PHINode>(II); ++II) {
- PHINode *PN = cast<PHINode>(II);
- int Idx = PN->getBasicBlockIndex(Invoke->getParent());
- if (Idx == -1) continue;
- PN->setIncomingBlock(Idx, NewBB);
- }
-
- UnwindDest = NewBB;
- }
-
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt());
-
- Value *PersFn = Sel->getArgOperand(1);
- LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0);
- Value *LPExn = Builder.CreateExtractValue(LPI, 0);
- Value *LPSel = Builder.CreateExtractValue(LPI, 1);
- Builder.CreateStore(LPExn, ExnSlot);
- Builder.CreateStore(LPSel, SelSlot);
-
- TransferClausesToLandingPadInst(LPI, Sel);
-
- DeadInsts.insert(Exn);
- DeadInsts.insert(Sel);
- }
-
- // Replace the old intrinsic calls with the values from the landingpad
- // instruction(s). These values were stored in allocas for us to use here.
- for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
- I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
- I != E; ++I) {
- std::pair<Value*, Value*> EHIntrinsics = I->second;
- CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
- CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
- BasicBlock *Parent = Exn->getParent();
-
- std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()];
-
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(Parent, Exn);
- LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load");
- LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load");
-
- Exn->replaceAllUsesWith(LPExn);
- Sel->replaceAllUsesWith(LPSel);
- }
-
- // Remove the dead instructions.
- for (SmallPtrSet<Instruction*, 32>::iterator
- I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) {
- Instruction *Inst = *I;
- Inst->eraseFromParent();
- }
-
- // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the
- // exception and selector values from the stored place.
- Function *EHResume = M->getFunction("llvm.eh.resume");
- if (!EHResume) return;
-
- while (!EHResume->use_empty()) {
- CallInst *Resume = cast<CallInst>(EHResume->use_back());
- BasicBlock *BB = Resume->getParent();
-
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(BB, Resume);
-
- Value *LPadVal =
- Builder.CreateInsertValue(UndefValue::get(LPadSlotTy),
- Resume->getArgOperand(0), 0, "lpad.val");
- LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1),
- 1, "lpad.val");
- Builder.CreateResume(LPadVal);
-
- // Remove all instructions after the 'resume.'
- BasicBlock::iterator I = Resume;
- while (I != BB->end()) {
- Instruction *Inst = &*I++;
- Inst->eraseFromParent();
- }
- }
-}
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index d0aa275c8fe2..d353b0adcff7 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -366,3 +366,6 @@ bool BasicBlock::isLandingPad() const {
LandingPadInst *BasicBlock::getLandingPadInst() {
return dyn_cast<LandingPadInst>(getFirstNonPHI());
}
+const LandingPadInst *BasicBlock::getLandingPadInst() const {
+ return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 0404297500c0..e1efcdadc711 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -8,7 +8,6 @@ add_llvm_library(LLVMCore
ConstantFold.cpp
Constants.cpp
Core.cpp
- DebugInfoProbe.cpp
DebugLoc.cpp
Dominators.cpp
Function.cpp
@@ -37,5 +36,3 @@ add_llvm_library(LLVMCore
ValueTypes.cpp
Verifier.cpp
)
-
-add_llvm_library_dependencies(LLVMCore LLVMSupport)
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 30bae7162cea..b743287adf39 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -38,11 +38,10 @@ using namespace llvm;
// ConstantFold*Instruction Implementations
//===----------------------------------------------------------------------===//
-/// BitCastConstantVector - Convert the specified ConstantVector node to the
+/// BitCastConstantVector - Convert the specified vector Constant node to the
/// specified vector type. At this point, we know that the elements of the
/// input vector constant are all simple integer or FP values.
-static Constant *BitCastConstantVector(ConstantVector *CV,
- VectorType *DstTy) {
+static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
if (CV->isNullValue()) return Constant::getNullValue(DstTy);
@@ -51,22 +50,21 @@ static Constant *BitCastConstantVector(ConstantVector *CV,
// doing so requires endianness information. This should be handled by
// Analysis/ConstantFolding.cpp
unsigned NumElts = DstTy->getNumElements();
- if (NumElts != CV->getNumOperands())
+ if (NumElts != CV->getType()->getVectorNumElements())
return 0;
+
+ Type *DstEltTy = DstTy->getElementType();
// Check to verify that all elements of the input are simple.
+ SmallVector<Constant*, 16> Result;
for (unsigned i = 0; i != NumElts; ++i) {
- if (!isa<ConstantInt>(CV->getOperand(i)) &&
- !isa<ConstantFP>(CV->getOperand(i)))
- return 0;
+ Constant *C = CV->getAggregateElement(i);
+ if (C == 0) return 0;
+ C = ConstantExpr::getBitCast(C, DstEltTy);
+ if (isa<ConstantExpr>(C)) return 0;
+ Result.push_back(C);
}
- // Bitcast each element now.
- std::vector<Constant*> Result;
- Type *DstEltTy = DstTy->getElementType();
- for (unsigned i = 0; i != NumElts; ++i)
- Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
- DstEltTy));
return ConstantVector::get(Result);
}
@@ -104,7 +102,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
// the first element. If so, return the appropriate GEP instruction.
if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
- if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
+ if (PTy->getAddressSpace() == DPTy->getAddressSpace()
+ && DPTy->getElementType()->isSized()) {
SmallVector<Value*, 8> IdxList;
Value *Zero =
Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
@@ -141,8 +140,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
if (isa<ConstantAggregateZero>(V))
return Constant::getNullValue(DestTy);
- if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
- return BitCastConstantVector(CV, DestPTy);
+ // Handle ConstantVector and ConstantAggregateVector.
+ return BitCastConstantVector(V, DestPTy);
}
// Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
@@ -548,18 +547,17 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// If the cast operand is a constant vector, perform the cast by
// operating on each element. In the cast of bitcasts, the element
// count may be mismatched; don't attempt to handle that here.
- if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
- if (DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() ==
- CV->getType()->getNumElements()) {
- std::vector<Constant*> res;
- VectorType *DestVecTy = cast<VectorType>(DestTy);
- Type *DstEltTy = DestVecTy->getElementType();
- for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- res.push_back(ConstantExpr::getCast(opc,
- CV->getOperand(i), DstEltTy));
- return ConstantVector::get(res);
- }
+ if ((isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) &&
+ DestTy->isVectorTy() &&
+ DestTy->getVectorNumElements() == V->getType()->getVectorNumElements()) {
+ SmallVector<Constant*, 16> res;
+ VectorType *DestVecTy = cast<VectorType>(DestTy);
+ Type *DstEltTy = DestVecTy->getElementType();
+ for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i)
+ res.push_back(ConstantExpr::getCast(opc,
+ V->getAggregateElement(i), DstEltTy));
+ return ConstantVector::get(res);
+ }
// We actually have to do a cast now. Perform the cast according to the
// opcode specified.
@@ -571,7 +569,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
bool ignored;
APFloat Val = FPC->getValueAPF();
- Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle :
+ Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf :
+ DestTy->isFloatTy() ? APFloat::IEEEsingle :
DestTy->isDoubleTy() ? APFloat::IEEEdouble :
DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
DestTy->isFP128Ty() ? APFloat::IEEEquad :
@@ -690,45 +689,27 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
Constant *V1, Constant *V2) {
- if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
- return CB->getZExtValue() ? V1 : V2;
-
- // Check for zero aggregate and ConstantVector of zeros
+ // Check for i1 and vector true/false conditions.
if (Cond->isNullValue()) return V2;
-
- if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
-
- if (CondV->isAllOnesValue()) return V1;
-
- VectorType *VTy = cast<VectorType>(V1->getType());
- ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
- ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
-
- if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
- (CP2 || isa<ConstantAggregateZero>(V2))) {
-
- // Find the element type of the returned vector
- Type *EltTy = VTy->getElementType();
- unsigned NumElem = VTy->getNumElements();
- std::vector<Constant*> Res(NumElem);
-
- bool Valid = true;
- for (unsigned i = 0; i < NumElem; ++i) {
- ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
- if (!c) {
- Valid = false;
- break;
- }
- Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res[i] = c->getZExtValue() ? C1 : C2;
- }
- // If we were able to build the vector, return it
- if (Valid) return ConstantVector::get(Res);
+ if (Cond->isAllOnesValue()) return V1;
+
+ // If the condition is a vector constant, fold the result elementwise.
+ if (ConstantVector *CondV = dyn_cast<ConstantVector>(Cond)) {
+ SmallVector<Constant*, 16> Result;
+ for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){
+ ConstantInt *Cond = dyn_cast<ConstantInt>(CondV->getOperand(i));
+ if (Cond == 0) break;
+
+ Constant *Res = (Cond->getZExtValue() ? V2 : V1)->getAggregateElement(i);
+ if (Res == 0) break;
+ Result.push_back(Res);
}
+
+ // If we were able to build the vector, return it.
+ if (Result.size() == V1->getType()->getVectorNumElements())
+ return ConstantVector::get(Result);
}
-
if (isa<UndefValue>(Cond)) {
if (isa<UndefValue>(V1)) return V1;
return V2;
@@ -754,22 +735,19 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
Constant *Idx) {
if (isa<UndefValue>(Val)) // ee(undef, x) -> undef
- return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
+ return UndefValue::get(Val->getType()->getVectorElementType());
if (Val->isNullValue()) // ee(zero, x) -> zero
- return Constant::getNullValue(
- cast<VectorType>(Val->getType())->getElementType());
-
- if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
- if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
- uint64_t Index = CIdx->getZExtValue();
- if (Index >= CVal->getNumOperands())
- // ee({w,x,y,z}, wrong_value) -> undef
- return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
- return CVal->getOperand(CIdx->getZExtValue());
- } else if (isa<UndefValue>(Idx)) {
- // ee({w,x,y,z}, undef) -> undef
- return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
- }
+ return Constant::getNullValue(Val->getType()->getVectorElementType());
+ // ee({w,x,y,z}, undef) -> undef
+ if (isa<UndefValue>(Idx))
+ return UndefValue::get(Val->getType()->getVectorElementType());
+
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+ uint64_t Index = CIdx->getZExtValue();
+ // ee({w,x,y,z}, wrong_value) -> undef
+ if (Index >= Val->getType()->getVectorNumElements())
+ return UndefValue::get(Val->getType()->getVectorElementType());
+ return Val->getAggregateElement(Index);
}
return 0;
}
@@ -779,103 +757,55 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
Constant *Idx) {
ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
if (!CIdx) return 0;
- APInt idxVal = CIdx->getValue();
- if (isa<UndefValue>(Val)) {
- // Insertion of scalar constant into vector undef
- // Optimize away insertion of undef
- if (isa<UndefValue>(Elt))
- return Val;
- // Otherwise break the aggregate undef into multiple undefs and do
- // the insertion
- unsigned numOps =
- cast<VectorType>(Val->getType())->getNumElements();
- std::vector<Constant*> Ops;
- Ops.reserve(numOps);
- for (unsigned i = 0; i < numOps; ++i) {
- Constant *Op =
- (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
- Ops.push_back(Op);
- }
- return ConstantVector::get(Ops);
- }
- if (isa<ConstantAggregateZero>(Val)) {
- // Insertion of scalar constant into vector aggregate zero
- // Optimize away insertion of zero
- if (Elt->isNullValue())
- return Val;
- // Otherwise break the aggregate zero into multiple zeros and do
- // the insertion
- unsigned numOps =
- cast<VectorType>(Val->getType())->getNumElements();
- std::vector<Constant*> Ops;
- Ops.reserve(numOps);
- for (unsigned i = 0; i < numOps; ++i) {
- Constant *Op =
- (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
- Ops.push_back(Op);
- }
- return ConstantVector::get(Ops);
- }
- if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
- // Insertion of scalar constant into vector constant
- std::vector<Constant*> Ops;
- Ops.reserve(CVal->getNumOperands());
- for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
- Constant *Op =
- (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
- Ops.push_back(Op);
+ const APInt &IdxVal = CIdx->getValue();
+
+ SmallVector<Constant*, 16> Result;
+ for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){
+ if (i == IdxVal) {
+ Result.push_back(Elt);
+ continue;
}
- return ConstantVector::get(Ops);
+
+ if (Constant *C = Val->getAggregateElement(i))
+ Result.push_back(C);
+ else
+ return 0;
}
-
- return 0;
-}
-
-/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
-/// return the specified element value. Otherwise return null.
-static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
- if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
- return CV->getOperand(EltNo);
-
- Type *EltTy = cast<VectorType>(C->getType())->getElementType();
- if (isa<ConstantAggregateZero>(C))
- return Constant::getNullValue(EltTy);
- if (isa<UndefValue>(C))
- return UndefValue::get(EltTy);
- return 0;
+
+ return ConstantVector::get(Result);
}
Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
Constant *V2,
Constant *Mask) {
+ unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+ Type *EltTy = V1->getType()->getVectorElementType();
+
// Undefined shuffle mask -> undefined value.
- if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
+ if (isa<UndefValue>(Mask))
+ return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
- unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
- unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
- Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+ // Don't break the bitcode reader hack.
+ if (isa<ConstantExpr>(Mask)) return 0;
+
+ unsigned SrcNumElts = V1->getType()->getVectorNumElements();
// Loop over the shuffle mask, evaluating each element.
SmallVector<Constant*, 32> Result;
for (unsigned i = 0; i != MaskNumElts; ++i) {
- Constant *InElt = GetVectorElement(Mask, i);
- if (InElt == 0) return 0;
-
- if (isa<UndefValue>(InElt))
- InElt = UndefValue::get(EltTy);
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
- unsigned Elt = CI->getZExtValue();
- if (Elt >= SrcNumElts*2)
- InElt = UndefValue::get(EltTy);
- else if (Elt >= SrcNumElts)
- InElt = GetVectorElement(V2, Elt - SrcNumElts);
- else
- InElt = GetVectorElement(V1, Elt);
- if (InElt == 0) return 0;
- } else {
- // Unknown value.
- return 0;
+ int Elt = ShuffleVectorInst::getMaskValue(Mask, i);
+ if (Elt == -1) {
+ Result.push_back(UndefValue::get(EltTy));
+ continue;
}
+ Constant *InElt;
+ if (unsigned(Elt) >= SrcNumElts*2)
+ InElt = UndefValue::get(EltTy);
+ else if (unsigned(Elt) >= SrcNumElts)
+ InElt = V2->getAggregateElement(Elt - SrcNumElts);
+ else
+ InElt = V1->getAggregateElement(Elt);
+ if (InElt == 0) return 0;
Result.push_back(InElt);
}
@@ -888,26 +818,10 @@ Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
if (Idxs.empty())
return Agg;
- if (isa<UndefValue>(Agg)) // ev(undef, x) -> undef
- return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
- Idxs));
-
- if (isa<ConstantAggregateZero>(Agg)) // ev(0, x) -> 0
- return
- Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
- Idxs));
-
- // Otherwise recurse.
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
- return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]),
- Idxs.slice(1));
-
- if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
- return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]),
- Idxs.slice(1));
- ConstantVector *CV = cast<ConstantVector>(Agg);
- return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]),
- Idxs.slice(1));
+ if (Constant *C = Agg->getAggregateElement(Idxs[0]))
+ return ConstantFoldExtractValueInstruction(C, Idxs.slice(1));
+
+ return 0;
}
Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
@@ -917,84 +831,30 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
if (Idxs.empty())
return Val;
- if (isa<UndefValue>(Agg)) {
- // Insertion of constant into aggregate undef
- // Optimize away insertion of undef.
- if (isa<UndefValue>(Val))
- return Agg;
-
- // Otherwise break the aggregate undef into multiple undefs and do
- // the insertion.
- CompositeType *AggTy = cast<CompositeType>(Agg->getType());
- unsigned numOps;
- if (ArrayType *AR = dyn_cast<ArrayType>(AggTy))
- numOps = AR->getNumElements();
- else
- numOps = cast<StructType>(AggTy)->getNumElements();
-
- std::vector<Constant*> Ops(numOps);
- for (unsigned i = 0; i < numOps; ++i) {
- Type *MemberTy = AggTy->getTypeAtIndex(i);
- Constant *Op =
- (Idxs[0] == i) ?
- ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
- Val, Idxs.slice(1)) :
- UndefValue::get(MemberTy);
- Ops[i] = Op;
- }
-
- if (StructType* ST = dyn_cast<StructType>(AggTy))
- return ConstantStruct::get(ST, Ops);
- return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
- }
+ unsigned NumElts;
+ if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+ NumElts = ST->getNumElements();
+ else if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+ NumElts = AT->getNumElements();
+ else
+ NumElts = AT->getVectorNumElements();
- if (isa<ConstantAggregateZero>(Agg)) {
- // Insertion of constant into aggregate zero
- // Optimize away insertion of zero.
- if (Val->isNullValue())
- return Agg;
-
- // Otherwise break the aggregate zero into multiple zeros and do
- // the insertion.
- CompositeType *AggTy = cast<CompositeType>(Agg->getType());
- unsigned numOps;
- if (ArrayType *AR = dyn_cast<ArrayType>(AggTy))
- numOps = AR->getNumElements();
- else
- numOps = cast<StructType>(AggTy)->getNumElements();
+ SmallVector<Constant*, 32> Result;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *C = Agg->getAggregateElement(i);
+ if (C == 0) return 0;
- std::vector<Constant*> Ops(numOps);
- for (unsigned i = 0; i < numOps; ++i) {
- Type *MemberTy = AggTy->getTypeAtIndex(i);
- Constant *Op =
- (Idxs[0] == i) ?
- ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
- Val, Idxs.slice(1)) :
- Constant::getNullValue(MemberTy);
- Ops[i] = Op;
- }
+ if (Idxs[0] == i)
+ C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
- if (StructType *ST = dyn_cast<StructType>(AggTy))
- return ConstantStruct::get(ST, Ops);
- return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+ Result.push_back(C);
}
- if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
- // Insertion of constant into aggregate constant.
- std::vector<Constant*> Ops(Agg->getNumOperands());
- for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
- Constant *Op = cast<Constant>(Agg->getOperand(i));
- if (Idxs[0] == i)
- Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1));
- Ops[i] = Op;
- }
-
- if (StructType* ST = dyn_cast<StructType>(Agg->getType()))
- return ConstantStruct::get(ST, Ops);
- return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
- }
-
- return 0;
+ if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+ return ConstantStruct::get(ST, Result);
+ if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+ return ConstantArray::get(AT, Result);
+ return ConstantVector::get(Result);
}
@@ -1172,7 +1032,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
// At this point we know neither constant is an UndefValue.
if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
- using namespace APIntOps;
const APInt &C1V = CI1->getValue();
const APInt &C2V = CI2->getValue();
switch (Opcode) {
@@ -1269,145 +1128,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
}
} else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
- ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
- ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
- if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
- (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
- std::vector<Constant*> Res;
- Type* EltTy = VTy->getElementType();
- Constant *C1 = 0;
- Constant *C2 = 0;
- switch (Opcode) {
- default:
- break;
- case Instruction::Add:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getAdd(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::FAdd:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getFAdd(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::Sub:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getSub(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::FSub:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getFSub(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::Mul:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getMul(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::FMul:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getFMul(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::UDiv:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getUDiv(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::SDiv:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getSDiv(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::FDiv:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getFDiv(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::URem:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getURem(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::SRem:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getSRem(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::FRem:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getFRem(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::And:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getAnd(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::Or:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getOr(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::Xor:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getXor(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::LShr:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getLShr(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::AShr:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getAShr(C1, C2));
- }
- return ConstantVector::get(Res);
- case Instruction::Shl:
- for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
- C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
- Res.push_back(ConstantExpr::getShl(C1, C2));
- }
- return ConstantVector::get(Res);
- }
+ // Perform elementwise folding.
+ SmallVector<Constant*, 16> Result;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *LHS = C1->getAggregateElement(i);
+ Constant *RHS = C2->getAggregateElement(i);
+ if (LHS == 0 || RHS == 0) break;
+
+ Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
}
+
+ if (Result.size() == VTy->getNumElements())
+ return ConstantVector::get(Result);
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
@@ -1906,7 +1638,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
APInt V1 = cast<ConstantInt>(C1)->getValue();
APInt V2 = cast<ConstantInt>(C2)->getValue();
switch (pred) {
- default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
+ default: llvm_unreachable("Invalid ICmp Predicate");
case ICmpInst::ICMP_EQ: return ConstantInt::get(ResultTy, V1 == V2);
case ICmpInst::ICMP_NE: return ConstantInt::get(ResultTy, V1 != V2);
case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
@@ -1923,7 +1655,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
APFloat::cmpResult R = C1V.compare(C2V);
switch (pred) {
- default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
+ default: llvm_unreachable("Invalid FCmp Predicate");
case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy);
case FCmpInst::FCMP_UNO:
@@ -1962,20 +1694,20 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
R==APFloat::cmpEqual);
}
} else if (C1->getType()->isVectorTy()) {
- SmallVector<Constant*, 16> C1Elts, C2Elts;
- C1->getVectorElements(C1Elts);
- C2->getVectorElements(C2Elts);
- if (C1Elts.empty() || C2Elts.empty())
- return 0;
-
// If we can constant fold the comparison of each element, constant fold
// the whole vector comparison.
SmallVector<Constant*, 4> ResElts;
// Compare the elements, producing an i1 result or constant expr.
- for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
- ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
-
- return ConstantVector::get(ResElts);
+ for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){
+ Constant *C1E = C1->getAggregateElement(i);
+ Constant *C2E = C2->getAggregateElement(i);
+ if (C1E == 0 || C2E == 0) break;
+
+ ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
+ }
+
+ if (ResElts.size() == C1->getType()->getVectorNumElements())
+ return ConstantVector::get(ResElts);
}
if (C1->getType()->isFloatingPointTy()) {
@@ -2209,7 +1941,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
I != E; ++I)
LastTy = *I;
- if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+ if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
SmallVector<Value*, 16> NewIndices;
NewIndices.reserve(Idxs.size() + CE->getNumOperands());
for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index a84a046bb998..6dbc1449f245 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -40,6 +40,8 @@ using namespace llvm;
// Constant Class
//===----------------------------------------------------------------------===//
+void Constant::anchor() { }
+
bool Constant::isNegativeZeroValue() const {
// Floating point values have an explicit -0.0 value.
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
@@ -71,17 +73,27 @@ bool Constant::isAllOnesValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
- // Check for constant vectors
+ // Check for constant vectors which are splats of -1 values.
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
- return CV->isAllOnesValue();
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isAllOnesValue();
+
+ // Check for constant vectors which are splats of -1 values.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isAllOnesValue();
return false;
}
+
// Constructor to create a '0' constant of arbitrary type...
Constant *Constant::getNullValue(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::IntegerTyID:
return ConstantInt::get(Ty, 0);
+ case Type::HalfTyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEhalf));
case Type::FloatTyID:
return ConstantFP::get(Ty->getContext(),
APFloat::getZero(APFloat::IEEEsingle));
@@ -105,8 +117,7 @@ Constant *Constant::getNullValue(Type *Ty) {
return ConstantAggregateZero::get(Ty);
default:
// Function, Label, or Opaque type?
- assert(0 && "Cannot create a null constant of that type!");
- return 0;
+ llvm_unreachable("Cannot create a null constant of that type!");
}
}
@@ -122,7 +133,7 @@ Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
// Broadcast a scalar to a vector, if necessary.
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+ C = ConstantVector::getSplat(VTy->getNumElements(), C);
return C;
}
@@ -138,13 +149,44 @@ Constant *Constant::getAllOnesValue(Type *Ty) {
return ConstantFP::get(Ty->getContext(), FL);
}
- SmallVector<Constant*, 16> Elts;
VectorType *VTy = cast<VectorType>(Ty);
- Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
- assert(Elts[0] && "Invalid AllOnes value!");
- return cast<ConstantVector>(ConstantVector::get(Elts));
+ return ConstantVector::getSplat(VTy->getNumElements(),
+ getAllOnesValue(VTy->getElementType()));
}
+/// getAggregateElement - For aggregates (struct/array/vector) return the
+/// constant that corresponds to the specified element if possible, or null if
+/// not. This can return null if the element index is a ConstantExpr, or if
+/// 'this' is a constant expr.
+Constant *Constant::getAggregateElement(unsigned Elt) const {
+ if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(this))
+ return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0;
+
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(this))
+ return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0;
+
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0;
+
+ if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
+ return CAZ->getElementValue(Elt);
+
+ if (const UndefValue *UV = dyn_cast<UndefValue>(this))
+ return UV->getElementValue(Elt);
+
+ if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
+ return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0;
+ return 0;
+}
+
+Constant *Constant::getAggregateElement(Constant *Elt) const {
+ assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
+ return getAggregateElement(CI->getZExtValue());
+ return 0;
+}
+
+
void Constant::destroyConstantImpl() {
// When a Constant is destroyed, there may be lingering
// references to the constant by other constants in the constant pool. These
@@ -163,8 +205,7 @@ void Constant::destroyConstantImpl() {
}
#endif
assert(isa<Constant>(V) && "References remain to Constant being destroyed");
- Constant *CV = cast<Constant>(V);
- CV->destroyConstant();
+ cast<Constant>(V)->destroyConstant();
// The constant should remove itself from our use list...
assert((use_empty() || use_back() != V) && "Constant not removed!");
@@ -270,36 +311,6 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
return Result;
}
-
-/// getVectorElements - This method, which is only valid on constant of vector
-/// type, returns the elements of the vector in the specified smallvector.
-/// This handles breaking down a vector undef into undef elements, etc. For
-/// constant exprs and other cases we can't handle, we return an empty vector.
-void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
- assert(getType()->isVectorTy() && "Not a vector constant!");
-
- if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
- for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
- Elts.push_back(CV->getOperand(i));
- return;
- }
-
- VectorType *VT = cast<VectorType>(getType());
- if (isa<ConstantAggregateZero>(this)) {
- Elts.assign(VT->getNumElements(),
- Constant::getNullValue(VT->getElementType()));
- return;
- }
-
- if (isa<UndefValue>(this)) {
- Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType()));
- return;
- }
-
- // Unknown type, must be constant expr etc.
-}
-
-
/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
/// it. This involves recursively eliminating any dead users of the
/// constantexpr.
@@ -358,6 +369,8 @@ void Constant::removeDeadConstantUsers() const {
// ConstantInt
//===----------------------------------------------------------------------===//
+void ConstantInt::anchor() { }
+
ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
: Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
@@ -385,9 +398,8 @@ Constant *ConstantInt::getTrue(Type *Ty) {
}
assert(VTy->getElementType()->isIntegerTy(1) &&
"True must be vector of i1 or i1.");
- SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
- ConstantInt::getTrue(Ty->getContext()));
- return ConstantVector::get(Splat);
+ return ConstantVector::getSplat(VTy->getNumElements(),
+ ConstantInt::getTrue(Ty->getContext()));
}
Constant *ConstantInt::getFalse(Type *Ty) {
@@ -398,9 +410,8 @@ Constant *ConstantInt::getFalse(Type *Ty) {
}
assert(VTy->getElementType()->isIntegerTy(1) &&
"False must be vector of i1 or i1.");
- SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
- ConstantInt::getFalse(Ty->getContext()));
- return ConstantVector::get(Splat);
+ return ConstantVector::getSplat(VTy->getNumElements(),
+ ConstantInt::getFalse(Ty->getContext()));
}
@@ -424,18 +435,17 @@ Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
// For vectors, broadcast the value.
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::get(SmallVector<Constant*,
- 16>(VTy->getNumElements(), C));
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
return C;
}
-ConstantInt* ConstantInt::get(IntegerType* Ty, uint64_t V,
+ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V,
bool isSigned) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
}
-ConstantInt* ConstantInt::getSigned(IntegerType* Ty, int64_t V) {
+ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) {
return get(Ty, V, true);
}
@@ -443,20 +453,19 @@ Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
return get(Ty, V, true);
}
-Constant *ConstantInt::get(Type* Ty, const APInt& V) {
+Constant *ConstantInt::get(Type *Ty, const APInt& V) {
ConstantInt *C = get(Ty->getContext(), V);
assert(C->getType() == Ty->getScalarType() &&
"ConstantInt type doesn't match the type implied by its value!");
// For vectors, broadcast the value.
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::get(
- SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
return C;
}
-ConstantInt* ConstantInt::get(IntegerType* Ty, StringRef Str,
+ConstantInt *ConstantInt::get(IntegerType* Ty, StringRef Str,
uint8_t radix) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
}
@@ -466,6 +475,8 @@ ConstantInt* ConstantInt::get(IntegerType* Ty, StringRef Str,
//===----------------------------------------------------------------------===//
static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
+ if (Ty->isHalfTy())
+ return &APFloat::IEEEhalf;
if (Ty->isFloatTy())
return &APFloat::IEEEsingle;
if (Ty->isDoubleTy())
@@ -479,10 +490,12 @@ static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
return &APFloat::PPCDoubleDouble;
}
+void ConstantFP::anchor() { }
+
/// get() - This returns a constant fp for the specified value in the
/// specified type. This should only be used for simple constant values like
/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant *ConstantFP::get(Type* Ty, double V) {
+Constant *ConstantFP::get(Type *Ty, double V) {
LLVMContext &Context = Ty->getContext();
APFloat FV(V);
@@ -493,14 +506,13 @@ Constant *ConstantFP::get(Type* Ty, double V) {
// For vectors, broadcast the value.
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::get(
- SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
return C;
}
-Constant *ConstantFP::get(Type* Ty, StringRef Str) {
+Constant *ConstantFP::get(Type *Ty, StringRef Str) {
LLVMContext &Context = Ty->getContext();
APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -508,31 +520,28 @@ Constant *ConstantFP::get(Type* Ty, StringRef Str) {
// For vectors, broadcast the value.
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::get(
- SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
return C;
}
-ConstantFP* ConstantFP::getNegativeZero(Type* Ty) {
+ConstantFP *ConstantFP::getNegativeZero(Type *Ty) {
LLVMContext &Context = Ty->getContext();
- APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+ APFloat apf = cast<ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
apf.changeSign();
return get(Context, apf);
}
-Constant *ConstantFP::getZeroValueForNegation(Type* Ty) {
- if (VectorType *PTy = dyn_cast<VectorType>(Ty))
- if (PTy->getElementType()->isFloatingPointTy()) {
- SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
- getNegativeZero(PTy->getElementType()));
- return ConstantVector::get(zeros);
- }
-
- if (Ty->isFloatingPointTy())
- return getNegativeZero(Ty);
+Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
+ Type *ScalarTy = Ty->getScalarType();
+ if (ScalarTy->isFloatingPointTy()) {
+ Constant *C = getNegativeZero(ScalarTy);
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+ return C;
+ }
return Constant::getNullValue(Ty);
}
@@ -548,7 +557,9 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
if (!Slot) {
Type *Ty;
- if (&V.getSemantics() == &APFloat::IEEEsingle)
+ if (&V.getSemantics() == &APFloat::IEEEhalf)
+ Ty = Type::getHalfTy(Context);
+ else if (&V.getSemantics() == &APFloat::IEEEsingle)
Ty = Type::getFloatTy(Context);
else if (&V.getSemantics() == &APFloat::IEEEdouble)
Ty = Type::getDoubleTy(Context);
@@ -584,9 +595,83 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const {
}
//===----------------------------------------------------------------------===//
+// ConstantAggregateZero Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this CAZ has array or vector type, return a zero
+/// with the right element type.
+Constant *ConstantAggregateZero::getSequentialElement() const {
+ return Constant::getNullValue(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this CAZ has struct type, return a zero with the
+/// right element type for the specified element.
+Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const {
+ return Constant::getNullValue(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+Constant *ConstantAggregateZero::getElementValue(Constant *C) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index.
+Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(Idx);
+}
+
+
+//===----------------------------------------------------------------------===//
+// UndefValue Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this undef has array or vector type, return an
+/// undef with the right element type.
+UndefValue *UndefValue::getSequentialElement() const {
+ return UndefValue::get(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this undef has struct type, return a zero with the
+/// right element type for the specified element.
+UndefValue *UndefValue::getStructElement(unsigned Elt) const {
+ return UndefValue::get(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+UndefValue *UndefValue::getElementValue(Constant *C) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index.
+UndefValue *UndefValue::getElementValue(unsigned Idx) const {
+ if (isa<SequentialType>(getType()))
+ return getSequentialElement();
+ return getStructElement(Idx);
+}
+
+
+
+//===----------------------------------------------------------------------===//
// ConstantXXX Classes
//===----------------------------------------------------------------------===//
+template <typename ItTy, typename EltTy>
+static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
+ for (; Start != End; ++Start)
+ if (*Start != Elt)
+ return false;
+ return true;
+}
ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
: Constant(T, ConstantArrayVal,
@@ -601,45 +686,97 @@ ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
}
Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
+ // Empty arrays are canonicalized to ConstantAggregateZero.
+ if (V.empty())
+ return ConstantAggregateZero::get(Ty);
+
for (unsigned i = 0, e = V.size(); i != e; ++i) {
assert(V[i]->getType() == Ty->getElementType() &&
"Wrong type in array element initializer");
}
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
- // If this is an all-zero array, return a ConstantAggregateZero object
- if (!V.empty()) {
- Constant *C = V[0];
- if (!C->isNullValue())
- return pImpl->ArrayConstants.getOrCreate(Ty, V);
-
- for (unsigned i = 1, e = V.size(); i != e; ++i)
- if (V[i] != C)
- return pImpl->ArrayConstants.getOrCreate(Ty, V);
- }
- return ConstantAggregateZero::get(Ty);
-}
+ // If this is an all-zero array, return a ConstantAggregateZero object. If
+ // all undef, return an UndefValue, if "all simple", then return a
+ // ConstantDataArray.
+ Constant *C = V[0];
+ if (isa<UndefValue>(C) && rangeOnlyContains(V.begin(), V.end(), C))
+ return UndefValue::get(Ty);
-/// ConstantArray::get(const string&) - Return an array that is initialized to
-/// contain the specified string. If length is zero then a null terminator is
-/// added to the specified string so that it may be used in a natural way.
-/// Otherwise, the length parameter specifies how much of the string to use
-/// and it won't be null terminated.
-///
-Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
- bool AddNull) {
- std::vector<Constant*> ElementVals;
- ElementVals.reserve(Str.size() + size_t(AddNull));
- for (unsigned i = 0; i < Str.size(); ++i)
- ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
-
- // Add a null terminator to the string...
- if (AddNull) {
- ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
+ if (C->isNullValue() && rangeOnlyContains(V.begin(), V.end(), C))
+ return ConstantAggregateZero::get(Ty);
+
+ // Check to see if all of the elements are ConstantFP or ConstantInt and if
+ // the element type is compatible with ConstantDataVector. If so, use it.
+ if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+ // We speculatively build the elements here even if it turns out that there
+ // is a constantexpr or something else weird in the array, since it is so
+ // uncommon for that to happen.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getType()->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(64)) {
+ SmallVector<uint64_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ }
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ SmallVector<float, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToFloat());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ } else if (CFP->getType()->isDoubleTy()) {
+ SmallVector<double, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToDouble());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataArray::get(C->getContext(), Elts);
+ }
+ }
}
- ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size());
- return get(ATy, ElementVals);
+ // Otherwise, we really do want to create a ConstantArray.
+ return pImpl->ArrayConstants.getOrCreate(Ty, V);
}
/// getTypeForElements - Return an anonymous struct type to use for a constant
@@ -647,9 +784,10 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
ArrayRef<Constant*> V,
bool Packed) {
- SmallVector<Type*, 16> EltTypes;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- EltTypes.push_back(V[i]->getType());
+ unsigned VecSize = V.size();
+ SmallVector<Type*, 16> EltTypes(VecSize);
+ for (unsigned i = 0; i != VecSize; ++i)
+ EltTypes[i] = V[i]->getType();
return StructType::get(Context, EltTypes, Packed);
}
@@ -677,14 +815,31 @@ ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V)
// ConstantStruct accessors.
Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
- // Create a ConstantAggregateZero value if all elements are zeros.
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (!V[i]->isNullValue())
- return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
-
assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
"Incorrect # elements specified to ConstantStruct::get");
- return ConstantAggregateZero::get(ST);
+
+ // Create a ConstantAggregateZero value if all elements are zeros.
+ bool isZero = true;
+ bool isUndef = false;
+
+ if (!V.empty()) {
+ isUndef = isa<UndefValue>(V[0]);
+ isZero = V[0]->isNullValue();
+ if (isUndef || isZero) {
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ if (!V[i]->isNullValue())
+ isZero = false;
+ if (!isa<UndefValue>(V[i]))
+ isUndef = false;
+ }
+ }
+ }
+ if (isZero)
+ return ConstantAggregateZero::get(ST);
+ if (isUndef)
+ return UndefValue::get(ST);
+
+ return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
}
Constant *ConstantStruct::get(StructType *T, ...) {
@@ -731,10 +886,93 @@ Constant *ConstantVector::get(ArrayRef<Constant*> V) {
return ConstantAggregateZero::get(T);
if (isUndef)
return UndefValue::get(T);
+
+ // Check to see if all of the elements are ConstantFP or ConstantInt and if
+ // the element type is compatible with ConstantDataVector. If so, use it.
+ if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+ // We speculatively build the elements here even if it turns out that there
+ // is a constantexpr or something else weird in the array, since it is so
+ // uncommon for that to happen.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getType()->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CI->getType()->isIntegerTy(64)) {
+ SmallVector<uint64_t, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+ Elts.push_back(CI->getZExtValue());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ }
+ }
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ SmallVector<float, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToFloat());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ } else if (CFP->getType()->isDoubleTy()) {
+ SmallVector<double, 16> Elts;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+ Elts.push_back(CFP->getValueAPF().convertToDouble());
+ else
+ break;
+ if (Elts.size() == V.size())
+ return ConstantDataVector::get(C->getContext(), Elts);
+ }
+ }
+ }
+
+ // Otherwise, the element type isn't compatible with ConstantDataVector, or
+ // the operand list constants a ConstantExpr or something else strange.
return pImpl->VectorConstants.getOrCreate(T, V);
}
+Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
+ // If this splat is compatible with ConstantDataVector, use it instead of
+ // ConstantVector.
+ if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
+ ConstantDataSequential::isElementTypeCompatible(V->getType()))
+ return ConstantDataVector::getSplat(NumElts, V);
+
+ SmallVector<Constant*, 32> Elts(NumElts, V);
+ return get(Elts);
+}
+
+
// Utility function for determining if a ConstantExpr is a CastOp or not. This
// can't be inline because we don't want to #include Instruction.h into
// Constant.h
@@ -793,66 +1031,16 @@ unsigned ConstantExpr::getPredicate() const {
/// one, but with the specified operand set to the specified value.
Constant *
ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
- assert(OpNo < getNumOperands() && "Operand num is out of range!");
assert(Op->getType() == getOperand(OpNo)->getType() &&
"Replacing operand with value of different type!");
if (getOperand(OpNo) == Op)
return const_cast<ConstantExpr*>(this);
+
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ NewOps.push_back(i == OpNo ? Op : getOperand(i));
- Constant *Op0, *Op1, *Op2;
- switch (getOpcode()) {
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- return ConstantExpr::getCast(getOpcode(), Op, getType());
- case Instruction::Select:
- Op0 = (OpNo == 0) ? Op : getOperand(0);
- Op1 = (OpNo == 1) ? Op : getOperand(1);
- Op2 = (OpNo == 2) ? Op : getOperand(2);
- return ConstantExpr::getSelect(Op0, Op1, Op2);
- case Instruction::InsertElement:
- Op0 = (OpNo == 0) ? Op : getOperand(0);
- Op1 = (OpNo == 1) ? Op : getOperand(1);
- Op2 = (OpNo == 2) ? Op : getOperand(2);
- return ConstantExpr::getInsertElement(Op0, Op1, Op2);
- case Instruction::ExtractElement:
- Op0 = (OpNo == 0) ? Op : getOperand(0);
- Op1 = (OpNo == 1) ? Op : getOperand(1);
- return ConstantExpr::getExtractElement(Op0, Op1);
- case Instruction::ShuffleVector:
- Op0 = (OpNo == 0) ? Op : getOperand(0);
- Op1 = (OpNo == 1) ? Op : getOperand(1);
- Op2 = (OpNo == 2) ? Op : getOperand(2);
- return ConstantExpr::getShuffleVector(Op0, Op1, Op2);
- case Instruction::GetElementPtr: {
- SmallVector<Constant*, 8> Ops;
- Ops.resize(getNumOperands()-1);
- for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
- Ops[i-1] = getOperand(i);
- if (OpNo == 0)
- return
- ConstantExpr::getGetElementPtr(Op, Ops,
- cast<GEPOperator>(this)->isInBounds());
- Ops[OpNo-1] = Op;
- return
- ConstantExpr::getGetElementPtr(getOperand(0), Ops,
- cast<GEPOperator>(this)->isInBounds());
- }
- default:
- assert(getNumOperands() == 2 && "Must be binary operator?");
- Op0 = (OpNo == 0) ? Op : getOperand(0);
- Op1 = (OpNo == 1) ? Op : getOperand(1);
- return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassOptionalData);
- }
+ return getWithOperands(NewOps);
}
/// getWithOperands - This returns the current constant expression with the
@@ -888,12 +1076,15 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::InsertValue:
+ return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices());
+ case Instruction::ExtractValue:
+ return ConstantExpr::getExtractValue(Ops[0], getIndices());
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- return
- ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
- cast<GEPOperator>(this)->isInBounds());
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+ cast<GEPOperator>(this)->isInBounds());
case Instruction::ICmp:
case Instruction::FCmp:
return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
@@ -908,8 +1099,8 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
// isValueValidForType implementations
bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
- if (Ty == Type::getInt1Ty(Ty->getContext()))
+ unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay
+ if (Ty->isIntegerTy(1))
return Val == 0 || Val == 1;
if (NumBits >= 64)
return true; // always true, has to fit in largest type
@@ -918,8 +1109,8 @@ bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
}
bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
- if (Ty == Type::getInt1Ty(Ty->getContext()))
+ unsigned NumBits = Ty->getIntegerBitWidth();
+ if (Ty->isIntegerTy(1))
return Val == 0 || Val == 1 || Val == -1;
if (NumBits >= 64)
return true; // always true, has to fit in largest type
@@ -937,6 +1128,12 @@ bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
return false; // These can't be represented as floating point!
// FIXME rounding mode needs to be more flexible
+ case Type::HalfTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEhalf)
+ return true;
+ Val2.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
case Type::FloatTyID: {
if (&Val2.getSemantics() == &APFloat::IEEEsingle)
return true;
@@ -944,42 +1141,50 @@ bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
return !losesInfo;
}
case Type::DoubleTyID: {
- if (&Val2.getSemantics() == &APFloat::IEEEsingle ||
+ if (&Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
&Val2.getSemantics() == &APFloat::IEEEdouble)
return true;
Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
return !losesInfo;
}
case Type::X86_FP80TyID:
- return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
&Val2.getSemantics() == &APFloat::IEEEdouble ||
&Val2.getSemantics() == &APFloat::x87DoubleExtended;
case Type::FP128TyID:
- return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
&Val2.getSemantics() == &APFloat::IEEEdouble ||
&Val2.getSemantics() == &APFloat::IEEEquad;
case Type::PPC_FP128TyID:
- return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+ &Val2.getSemantics() == &APFloat::IEEEsingle ||
&Val2.getSemantics() == &APFloat::IEEEdouble ||
&Val2.getSemantics() == &APFloat::PPCDoubleDouble;
}
}
+
//===----------------------------------------------------------------------===//
// Factory Function Implementation
-ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) {
+ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
- LLVMContextImpl *pImpl = Ty->getContext().pImpl;
- return pImpl->AggZeroConstants.getOrCreate(Ty, 0);
+ ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
+ if (Entry == 0)
+ Entry = new ConstantAggregateZero(Ty);
+
+ return Entry;
}
-/// destroyConstant - Remove the constant from the constant table...
+/// destroyConstant - Remove the constant from the constant table.
///
void ConstantAggregateZero::destroyConstant() {
- getType()->getContext().pImpl->AggZeroConstants.remove(this);
+ getContext().pImpl->CAZConstants.erase(getType());
destroyConstantImpl();
}
@@ -990,69 +1195,6 @@ void ConstantArray::destroyConstant() {
destroyConstantImpl();
}
-/// isString - This method returns true if the array is an array of i8, and
-/// if the elements of the array are all ConstantInt's.
-bool ConstantArray::isString() const {
- // Check the element type for i8...
- if (!getType()->getElementType()->isIntegerTy(8))
- return false;
- // Check the elements to make sure they are all integers, not constant
- // expressions.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!isa<ConstantInt>(getOperand(i)))
- return false;
- return true;
-}
-
-/// isCString - This method returns true if the array is a string (see
-/// isString) and it ends in a null byte \\0 and does not contains any other
-/// null bytes except its terminator.
-bool ConstantArray::isCString() const {
- // Check the element type for i8...
- if (!getType()->getElementType()->isIntegerTy(8))
- return false;
-
- // Last element must be a null.
- if (!getOperand(getNumOperands()-1)->isNullValue())
- return false;
- // Other elements must be non-null integers.
- for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
- if (!isa<ConstantInt>(getOperand(i)))
- return false;
- if (getOperand(i)->isNullValue())
- return false;
- }
- return true;
-}
-
-
-/// convertToString - Helper function for getAsString() and getAsCString().
-static std::string convertToString(const User *U, unsigned len) {
- std::string Result;
- Result.reserve(len);
- for (unsigned i = 0; i != len; ++i)
- Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue());
- return Result;
-}
-
-/// getAsString - If this array is isString(), then this method converts the
-/// array to an std::string and returns it. Otherwise, it asserts out.
-///
-std::string ConstantArray::getAsString() const {
- assert(isString() && "Not a string!");
- return convertToString(this, getNumOperands());
-}
-
-
-/// getAsCString - If this array is isCString(), then this method converts the
-/// array (without the trailing null byte) to an std::string and returns it.
-/// Otherwise, it asserts out.
-///
-std::string ConstantArray::getAsCString() const {
- assert(isCString() && "Not a string!");
- return convertToString(this, getNumOperands() - 1);
-}
-
//---- ConstantStruct::get() implementation...
//
@@ -1071,26 +1213,6 @@ void ConstantVector::destroyConstant() {
destroyConstantImpl();
}
-/// This function will return true iff every element in this vector constant
-/// is set to all ones.
-/// @returns true iff this constant's elements are all set to all ones.
-/// @brief Determine if the value is all ones.
-bool ConstantVector::isAllOnesValue() const {
- // Check out first element.
- const Constant *Elt = getOperand(0);
- const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
- const ConstantFP *CF = dyn_cast<ConstantFP>(Elt);
-
- // Then make sure all remaining elements point to the same value.
- for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
- if (getOperand(I) != Elt)
- return false;
-
- // First value is all-ones.
- return (CI && CI->isAllOnesValue()) ||
- (CF && CF->isAllOnesValue());
-}
-
/// getSplatValue - If this is a splat constant, where all of the
/// elements have the same value, return that value. Otherwise return null.
Constant *ConstantVector::getSplatValue() const {
@@ -1107,13 +1229,18 @@ Constant *ConstantVector::getSplatValue() const {
//
ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
- return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
+ ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
+ if (Entry == 0)
+ Entry = new ConstantPointerNull(Ty);
+
+ return Entry;
}
// destroyConstant - Remove the constant from the constant table...
//
void ConstantPointerNull::destroyConstant() {
- getType()->getContext().pImpl->NullPtrConstants.remove(this);
+ getContext().pImpl->CPNConstants.erase(getType());
+ // Free the constant and any dangling references to it.
destroyConstantImpl();
}
@@ -1122,13 +1249,18 @@ void ConstantPointerNull::destroyConstant() {
//
UndefValue *UndefValue::get(Type *Ty) {
- return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
+ UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
+ if (Entry == 0)
+ Entry = new UndefValue(Ty);
+
+ return Entry;
}
// destroyConstant - Remove the constant from the constant table.
//
void UndefValue::destroyConstant() {
- getType()->getContext().pImpl->UndefValueConstants.remove(this);
+ // Free the constant and any dangling references to it.
+ getContext().pImpl->UVConstants.erase(getType());
destroyConstantImpl();
}
@@ -1236,7 +1368,6 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
switch (opc) {
default:
llvm_unreachable("Invalid cast opcode");
- break;
case Instruction::Trunc: return getTrunc(C, Ty);
case Instruction::ZExt: return getZExt(C, Ty);
case Instruction::SExt: return getSExt(C, Ty);
@@ -1250,7 +1381,6 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
case Instruction::IntToPtr: return getIntToPtr(C, Ty);
case Instruction::BitCast: return getBitCast(C, Ty);
}
- return 0;
}
Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
@@ -1416,14 +1546,26 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
}
Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
- assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
- assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+ assert(C->getType()->getScalarType()->isPointerTy() &&
+ "PtrToInt source must be pointer or pointer vector");
+ assert(DstTy->getScalarType()->isIntegerTy() &&
+ "PtrToInt destination must be integer or integer vector");
+ assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+ if (isa<VectorType>(C->getType()))
+ assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+ "Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::PtrToInt, C, DstTy);
}
Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
- assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
- assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+ assert(C->getType()->getScalarType()->isIntegerTy() &&
+ "IntToPtr source must be integer or integer vector");
+ assert(DstTy->getScalarType()->isPointerTy() &&
+ "IntToPtr destination must be a pointer or pointer vector");
+ assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+ if (isa<VectorType>(C->getType()))
+ assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+ "Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::IntToPtr, C, DstTy);
}
@@ -1603,7 +1745,7 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
// Get the result type of the getelementptr!
Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
assert(Ty && "GEP indices invalid!");
- unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
+ unsigned AS = C->getType()->getPointerAddressSpace();
Type *ReqTy = Ty->getPointerTo(AS);
assert(C->getType()->isPointerTy() &&
@@ -1683,7 +1825,7 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
LLVMContextImpl *pImpl = Val->getContext().pImpl;
- Type *ReqTy = cast<VectorType>(Val->getType())->getElementType();
+ Type *ReqTy = Val->getType()->getVectorElementType();
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
@@ -1691,8 +1833,8 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
Constant *Idx) {
assert(Val->getType()->isVectorTy() &&
"Tried to create insertelement operation on non-vector type!");
- assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
- && "Insertelement types must match!");
+ assert(Elt->getType() == Val->getType()->getVectorElementType() &&
+ "Insertelement types must match!");
assert(Idx->getType()->isIntegerTy(32) &&
"Insertelement index must be i32 type!");
@@ -1716,8 +1858,8 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
return FC; // Fold a few common cases.
- unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
- Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+ unsigned NElts = Mask->getType()->getVectorNumElements();
+ Type *EltTy = V1->getType()->getVectorElementType();
Type *ShufTy = VectorType::get(EltTy, NElts);
// Look up the constant in the table first to ensure uniqueness
@@ -1879,7 +2021,7 @@ const char *ConstantExpr::getOpcodeName() const {
GetElementPtrConstantExpr::
-GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
Type *DestTy)
: ConstantExpr(DestTy, Instruction::GetElementPtr,
OperandTraits<GetElementPtrConstantExpr>::op_end(this)
@@ -1889,6 +2031,341 @@ GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
OperandList[i+1] = IdxList[i];
}
+//===----------------------------------------------------------------------===//
+// ConstantData* implementations
+
+void ConstantDataArray::anchor() {}
+void ConstantDataVector::anchor() {}
+
+/// getElementType - Return the element type of the array/vector.
+Type *ConstantDataSequential::getElementType() const {
+ return getType()->getElementType();
+}
+
+StringRef ConstantDataSequential::getRawDataValues() const {
+ return StringRef(DataElements, getNumElements()*getElementByteSize());
+}
+
+/// isElementTypeCompatible - Return true if a ConstantDataSequential can be
+/// formed with a vector or array of the specified element type.
+/// ConstantDataArray only works with normal float and int types that are
+/// stored densely in memory, not with things like i42 or x86_f80.
+bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
+ if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
+ if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+ switch (IT->getBitWidth()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return true;
+ default: break;
+ }
+ }
+ return false;
+}
+
+/// getNumElements - Return the number of elements in the array or vector.
+unsigned ConstantDataSequential::getNumElements() const {
+ if (ArrayType *AT = dyn_cast<ArrayType>(getType()))
+ return AT->getNumElements();
+ return getType()->getVectorNumElements();
+}
+
+
+/// getElementByteSize - Return the size in bytes of the elements in the data.
+uint64_t ConstantDataSequential::getElementByteSize() const {
+ return getElementType()->getPrimitiveSizeInBits()/8;
+}
+
+/// getElementPointer - Return the start of the specified element.
+const char *ConstantDataSequential::getElementPointer(unsigned Elt) const {
+ assert(Elt < getNumElements() && "Invalid Elt");
+ return DataElements+Elt*getElementByteSize();
+}
+
+
+/// isAllZeros - return true if the array is empty or all zeros.
+static bool isAllZeros(StringRef Arr) {
+ for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I)
+ if (*I != 0)
+ return false;
+ return true;
+}
+
+/// getImpl - This is the underlying implementation of all of the
+/// ConstantDataSequential::get methods. They all thunk down to here, providing
+/// the correct element type. We take the bytes in as a StringRef because
+/// we *want* an underlying "char*" to avoid TBAA type punning violations.
+Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
+ assert(isElementTypeCompatible(Ty->getSequentialElementType()));
+ // If the elements are all zero or there are no elements, return a CAZ, which
+ // is more dense and canonical.
+ if (isAllZeros(Elements))
+ return ConstantAggregateZero::get(Ty);
+
+ // Do a lookup to see if we have already formed one of these.
+ StringMap<ConstantDataSequential*>::MapEntryTy &Slot =
+ Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements);
+
+ // The bucket can point to a linked list of different CDS's that have the same
+ // body but different types. For example, 0,0,0,1 could be a 4 element array
+ // of i8, or a 1-element array of i32. They'll both end up in the same
+ /// StringMap bucket, linked up by their Next pointers. Walk the list.
+ ConstantDataSequential **Entry = &Slot.getValue();
+ for (ConstantDataSequential *Node = *Entry; Node != 0;
+ Entry = &Node->Next, Node = *Entry)
+ if (Node->getType() == Ty)
+ return Node;
+
+ // Okay, we didn't get a hit. Create a node of the right class, link it in,
+ // and return it.
+ if (isa<ArrayType>(Ty))
+ return *Entry = new ConstantDataArray(Ty, Slot.getKeyData());
+
+ assert(isa<VectorType>(Ty));
+ return *Entry = new ConstantDataVector(Ty, Slot.getKeyData());
+}
+
+void ConstantDataSequential::destroyConstant() {
+ // Remove the constant from the StringMap.
+ StringMap<ConstantDataSequential*> &CDSConstants =
+ getType()->getContext().pImpl->CDSConstants;
+
+ StringMap<ConstantDataSequential*>::iterator Slot =
+ CDSConstants.find(getRawDataValues());
+
+ assert(Slot != CDSConstants.end() && "CDS not found in uniquing table");
+
+ ConstantDataSequential **Entry = &Slot->getValue();
+
+ // Remove the entry from the hash table.
+ if ((*Entry)->Next == 0) {
+ // If there is only one value in the bucket (common case) it must be this
+ // entry, and removing the entry should remove the bucket completely.
+ assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
+ getContext().pImpl->CDSConstants.erase(Slot);
+ } else {
+ // Otherwise, there are multiple entries linked off the bucket, unlink the
+ // node we care about but keep the bucket around.
+ for (ConstantDataSequential *Node = *Entry; ;
+ Entry = &Node->Next, Node = *Entry) {
+ assert(Node && "Didn't find entry in its uniquing hash table!");
+ // If we found our entry, unlink it from the list and we're done.
+ if (Node == this) {
+ *Entry = Node->Next;
+ break;
+ }
+ }
+ }
+
+ // If we were part of a list, make sure that we don't delete the list that is
+ // still owned by the uniquing map.
+ Next = 0;
+
+ // Finally, actually delete it.
+ destroyConstantImpl();
+}
+
+/// get() constructors - Return a constant with array type with an element
+/// count and element type matching the ArrayRef passed in. Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) {
+ Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
+ Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
+ Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+
+/// getString - This method constructs a CDS and initializes it with a text
+/// string. The default behavior (AddNull==true) causes a null terminator to
+/// be placed at the end of the array (increasing the length of the string by
+/// one more than the StringRef would normally indicate. Pass AddNull=false
+/// to disable this behavior.
+Constant *ConstantDataArray::getString(LLVMContext &Context,
+ StringRef Str, bool AddNull) {
+ if (!AddNull)
+ return get(Context, ArrayRef<uint8_t>((uint8_t*)Str.data(), Str.size()));
+
+ SmallVector<uint8_t, 64> ElementVals;
+ ElementVals.append(Str.begin(), Str.end());
+ ElementVals.push_back(0);
+ return get(Context, ElementVals);
+}
+
+/// get() constructors - Return a constant with vector type with an element
+/// count and element type matching the ArrayRef passed in. Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
+ Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
+ Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
+ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+
+Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
+ assert(isElementTypeCompatible(V->getType()) &&
+ "Element type not compatible with ConstantData");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getType()->isIntegerTy(8)) {
+ SmallVector<uint8_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+ if (CI->getType()->isIntegerTy(16)) {
+ SmallVector<uint16_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+ if (CI->getType()->isIntegerTy(32)) {
+ SmallVector<uint32_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+ assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type");
+ SmallVector<uint64_t, 16> Elts(NumElts, CI->getZExtValue());
+ return get(V->getContext(), Elts);
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy()) {
+ SmallVector<float, 16> Elts(NumElts, CFP->getValueAPF().convertToFloat());
+ return get(V->getContext(), Elts);
+ }
+ if (CFP->getType()->isDoubleTy()) {
+ SmallVector<double, 16> Elts(NumElts,
+ CFP->getValueAPF().convertToDouble());
+ return get(V->getContext(), Elts);
+ }
+ }
+ return ConstantVector::getSplat(NumElts, V);
+}
+
+
+/// getElementAsInteger - If this is a sequential container of integers (of
+/// any size), return the specified element in the low bits of a uint64_t.
+uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
+ assert(isa<IntegerType>(getElementType()) &&
+ "Accessor can only be used when element is an integer");
+ const char *EltPtr = getElementPointer(Elt);
+
+ // The data is stored in host byte order, make sure to cast back to the right
+ // type to load with the right endianness.
+ switch (getElementType()->getIntegerBitWidth()) {
+ default: llvm_unreachable("Invalid bitwidth for CDS");
+ case 8: return *(uint8_t*)EltPtr;
+ case 16: return *(uint16_t*)EltPtr;
+ case 32: return *(uint32_t*)EltPtr;
+ case 64: return *(uint64_t*)EltPtr;
+ }
+}
+
+/// getElementAsAPFloat - If this is a sequential container of floating point
+/// type, return the specified element as an APFloat.
+APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
+ const char *EltPtr = getElementPointer(Elt);
+
+ switch (getElementType()->getTypeID()) {
+ default:
+ llvm_unreachable("Accessor can only be used when element is float/double!");
+ case Type::FloatTyID: return APFloat(*(float*)EltPtr);
+ case Type::DoubleTyID: return APFloat(*(double*)EltPtr);
+ }
+}
+
+/// getElementAsFloat - If this is an sequential container of floats, return
+/// the specified element as a float.
+float ConstantDataSequential::getElementAsFloat(unsigned Elt) const {
+ assert(getElementType()->isFloatTy() &&
+ "Accessor can only be used when element is a 'float'");
+ return *(float*)getElementPointer(Elt);
+}
+
+/// getElementAsDouble - If this is an sequential container of doubles, return
+/// the specified element as a float.
+double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
+ assert(getElementType()->isDoubleTy() &&
+ "Accessor can only be used when element is a 'float'");
+ return *(double*)getElementPointer(Elt);
+}
+
+/// getElementAsConstant - Return a Constant for a specified index's element.
+/// Note that this has to compute a new constant to return, so it isn't as
+/// efficient as getElementAsInteger/Float/Double.
+Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
+ if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
+ return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
+
+ return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
+}
+
+/// isString - This method returns true if this is an array of i8.
+bool ConstantDataSequential::isString() const {
+ return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(8);
+}
+
+/// isCString - This method returns true if the array "isString", ends with a
+/// nul byte, and does not contains any other nul bytes.
+bool ConstantDataSequential::isCString() const {
+ if (!isString())
+ return false;
+
+ StringRef Str = getAsString();
+
+ // The last value must be nul.
+ if (Str.back() != 0) return false;
+
+ // Other elements must be non-nul.
+ return Str.drop_back().find(0) == StringRef::npos;
+}
+
+/// getSplatValue - If this is a splat constant, meaning that all of the
+/// elements have the same value, return that value. Otherwise return NULL.
+Constant *ConstantDataVector::getSplatValue() const {
+ const char *Base = getRawDataValues().data();
+
+ // Compare elements 1+ to the 0'th element.
+ unsigned EltSize = getElementByteSize();
+ for (unsigned i = 1, e = getNumElements(); i != e; ++i)
+ if (memcmp(Base, Base+i*EltSize, EltSize))
+ return 0;
+
+ // If they're all the same, return the 0th one as a representative.
+ return getElementAsConstant(0);
+}
//===----------------------------------------------------------------------===//
// replaceUsesOfWithOnConstant implementations
@@ -1911,56 +2388,46 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
LLVMContextImpl *pImpl = getType()->getContext().pImpl;
- std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
- Lookup.first.first = cast<ArrayType>(getType());
- Lookup.second = this;
-
- std::vector<Constant*> &Values = Lookup.first.second;
+ SmallVector<Constant*, 8> Values;
+ LLVMContextImpl::ArrayConstantsTy::LookupKey Lookup;
+ Lookup.first = cast<ArrayType>(getType());
Values.reserve(getNumOperands()); // Build replacement array.
// Fill values with the modified operands of the constant array. Also,
// compute whether this turns into an all-zeros array.
- bool isAllZeros = false;
unsigned NumUpdated = 0;
- if (!ToC->isNullValue()) {
- for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
- Constant *Val = cast<Constant>(O->get());
- if (Val == From) {
- Val = ToC;
- ++NumUpdated;
- }
- Values.push_back(Val);
- }
- } else {
- isAllZeros = true;
- for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) {
- Constant *Val = cast<Constant>(O->get());
- if (Val == From) {
- Val = ToC;
- ++NumUpdated;
- }
- Values.push_back(Val);
- if (isAllZeros) isAllZeros = Val->isNullValue();
+
+ // Keep track of whether all the values in the array are "ToC".
+ bool AllSame = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ if (Val == From) {
+ Val = ToC;
+ ++NumUpdated;
}
+ Values.push_back(Val);
+ AllSame &= Val == ToC;
}
Constant *Replacement = 0;
- if (isAllZeros) {
+ if (AllSame && ToC->isNullValue()) {
Replacement = ConstantAggregateZero::get(getType());
+ } else if (AllSame && isa<UndefValue>(ToC)) {
+ Replacement = UndefValue::get(getType());
} else {
// Check to see if we have this array type already.
- bool Exists;
+ Lookup.second = makeArrayRef(Values);
LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
- pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists);
+ pImpl->ArrayConstants.find(Lookup);
- if (Exists) {
- Replacement = I->second;
+ if (I != pImpl->ArrayConstants.map_end()) {
+ Replacement = I->first;
} else {
// Okay, the new shape doesn't exist in the system yet. Instead of
// creating a new constant array, inserting it, replaceallusesof'ing the
// old with the new, then deleting the old... just update the current one
// in place!
- pImpl->ArrayConstants.MoveConstantToNewSlot(this, I);
+ pImpl->ArrayConstants.remove(this);
// Update to the new value. Optimize for the case when we have a single
// operand that we're changing, but handle bulk updates efficiently.
@@ -1974,6 +2441,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
if (getOperand(i) == From)
setOperand(i, ToC);
}
+ pImpl->ArrayConstants.insert(this);
return;
}
}
@@ -1996,26 +2464,32 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
unsigned OperandToUpdate = U-OperandList;
assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
- std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
- Lookup.first.first = cast<StructType>(getType());
- Lookup.second = this;
- std::vector<Constant*> &Values = Lookup.first.second;
+ SmallVector<Constant*, 8> Values;
+ LLVMContextImpl::StructConstantsTy::LookupKey Lookup;
+ Lookup.first = cast<StructType>(getType());
Values.reserve(getNumOperands()); // Build replacement struct.
-
// Fill values with the modified operands of the constant struct. Also,
// compute whether this turns into an all-zeros struct.
bool isAllZeros = false;
- if (!ToC->isNullValue()) {
- for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
- Values.push_back(cast<Constant>(O->get()));
- } else {
+ bool isAllUndef = false;
+ if (ToC->isNullValue()) {
isAllZeros = true;
for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
Constant *Val = cast<Constant>(O->get());
Values.push_back(Val);
if (isAllZeros) isAllZeros = Val->isNullValue();
}
+ } else if (isa<UndefValue>(ToC)) {
+ isAllUndef = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ Values.push_back(Val);
+ if (isAllUndef) isAllUndef = isa<UndefValue>(Val);
+ }
+ } else {
+ for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+ Values.push_back(cast<Constant>(O->get()));
}
Values[OperandToUpdate] = ToC;
@@ -2024,23 +2498,26 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
Constant *Replacement = 0;
if (isAllZeros) {
Replacement = ConstantAggregateZero::get(getType());
+ } else if (isAllUndef) {
+ Replacement = UndefValue::get(getType());
} else {
// Check to see if we have this struct type already.
- bool Exists;
+ Lookup.second = makeArrayRef(Values);
LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
- pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
+ pImpl->StructConstants.find(Lookup);
- if (Exists) {
- Replacement = I->second;
+ if (I != pImpl->StructConstants.map_end()) {
+ Replacement = I->first;
} else {
// Okay, the new shape doesn't exist in the system yet. Instead of
// creating a new constant struct, inserting it, replaceallusesof'ing the
// old with the new, then deleting the old... just update the current one
// in place!
- pImpl->StructConstants.MoveConstantToNewSlot(this, I);
+ pImpl->StructConstants.remove(this);
// Update to the new value.
setOperand(OperandToUpdate, ToC);
+ pImpl->StructConstants.insert(this);
return;
}
}
@@ -2058,7 +2535,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
- std::vector<Constant*> Values;
+ SmallVector<Constant*, 8> Values;
Values.reserve(getNumOperands()); // Build replacement array...
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
Constant *Val = getOperand(i);
@@ -2081,89 +2558,13 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
Constant *To = cast<Constant>(ToV);
- Constant *Replacement = 0;
- if (getOpcode() == Instruction::GetElementPtr) {
- SmallVector<Constant*, 8> Indices;
- Constant *Pointer = getOperand(0);
- Indices.reserve(getNumOperands()-1);
- if (Pointer == From) Pointer = To;
-
- for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
- Constant *Val = getOperand(i);
- if (Val == From) Val = To;
- Indices.push_back(Val);
- }
- Replacement = ConstantExpr::getGetElementPtr(Pointer, Indices,
- cast<GEPOperator>(this)->isInBounds());
- } else if (getOpcode() == Instruction::ExtractValue) {
- Constant *Agg = getOperand(0);
- if (Agg == From) Agg = To;
-
- ArrayRef<unsigned> Indices = getIndices();
- Replacement = ConstantExpr::getExtractValue(Agg, Indices);
- } else if (getOpcode() == Instruction::InsertValue) {
- Constant *Agg = getOperand(0);
- Constant *Val = getOperand(1);
- if (Agg == From) Agg = To;
- if (Val == From) Val = To;
-
- ArrayRef<unsigned> Indices = getIndices();
- Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices);
- } else if (isCast()) {
- assert(getOperand(0) == From && "Cast only has one use!");
- Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
- } else if (getOpcode() == Instruction::Select) {
- Constant *C1 = getOperand(0);
- Constant *C2 = getOperand(1);
- Constant *C3 = getOperand(2);
- if (C1 == From) C1 = To;
- if (C2 == From) C2 = To;
- if (C3 == From) C3 = To;
- Replacement = ConstantExpr::getSelect(C1, C2, C3);
- } else if (getOpcode() == Instruction::ExtractElement) {
- Constant *C1 = getOperand(0);
- Constant *C2 = getOperand(1);
- if (C1 == From) C1 = To;
- if (C2 == From) C2 = To;
- Replacement = ConstantExpr::getExtractElement(C1, C2);
- } else if (getOpcode() == Instruction::InsertElement) {
- Constant *C1 = getOperand(0);
- Constant *C2 = getOperand(1);
- Constant *C3 = getOperand(1);
- if (C1 == From) C1 = To;
- if (C2 == From) C2 = To;
- if (C3 == From) C3 = To;
- Replacement = ConstantExpr::getInsertElement(C1, C2, C3);
- } else if (getOpcode() == Instruction::ShuffleVector) {
- Constant *C1 = getOperand(0);
- Constant *C2 = getOperand(1);
- Constant *C3 = getOperand(2);
- if (C1 == From) C1 = To;
- if (C2 == From) C2 = To;
- if (C3 == From) C3 = To;
- Replacement = ConstantExpr::getShuffleVector(C1, C2, C3);
- } else if (isCompare()) {
- Constant *C1 = getOperand(0);
- Constant *C2 = getOperand(1);
- if (C1 == From) C1 = To;
- if (C2 == From) C2 = To;
- if (getOpcode() == Instruction::ICmp)
- Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
- else {
- assert(getOpcode() == Instruction::FCmp);
- Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
- }
- } else if (getNumOperands() == 2) {
- Constant *C1 = getOperand(0);
- Constant *C2 = getOperand(1);
- if (C1 == From) C1 = To;
- if (C2 == From) C2 = To;
- Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassOptionalData);
- } else {
- llvm_unreachable("Unknown ConstantExpr type!");
- return;
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Constant *Op = getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
}
+ Constant *Replacement = getWithOperands(NewOps);
assert(Replacement != this && "I didn't contain From!");
// Everyone using this now uses the replacement.
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 1077004d7c76..8903a8f40f95 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -15,6 +15,8 @@
#ifndef LLVM_CONSTANTSCONTEXT_H
#define LLVM_CONSTANTSCONTEXT_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/Operator.h"
@@ -30,6 +32,7 @@ struct ConstantTraits;
/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement unary constant exprs.
class UnaryConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly one operand
@@ -46,6 +49,7 @@ public:
/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement binary constant exprs.
class BinaryConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly two operands
@@ -66,6 +70,7 @@ public:
/// SelectConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement select constant exprs.
class SelectConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly three operands
@@ -86,6 +91,7 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// extractelement constant exprs.
class ExtractElementConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly two operands
@@ -106,6 +112,7 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// insertelement constant exprs.
class InsertElementConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly three operands
@@ -127,6 +134,7 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// shufflevector constant exprs.
class ShuffleVectorConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly three operands
@@ -151,6 +159,7 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// extractvalue constant exprs.
class ExtractValueConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly one operand
@@ -176,6 +185,7 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// insertvalue constant exprs.
class InsertValueConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
public:
// allocate space for exactly one operand
@@ -202,11 +212,12 @@ public:
/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
/// used behind the scenes to implement getelementpr constant exprs.
class GetElementPtrConstantExpr : public ConstantExpr {
- GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+ virtual void anchor();
+ GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
Type *DestTy);
public:
static GetElementPtrConstantExpr *Create(Constant *C,
- const std::vector<Constant*>&IdxList,
+ ArrayRef<Constant*> IdxList,
Type *DestTy,
unsigned Flags) {
GetElementPtrConstantExpr *Result =
@@ -221,8 +232,10 @@ public:
// CompareConstantExpr - This class is private to Constants.cpp, and is used
// behind the scenes to implement ICmp and FCmp constant expressions. This is
// needed in order to store the predicate value for these instructions.
-struct CompareConstantExpr : public ConstantExpr {
+class CompareConstantExpr : public ConstantExpr {
+ virtual void anchor();
void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
// allocate space for exactly two operands
void *operator new(size_t s) {
return User::operator new(s, 2);
@@ -397,6 +410,13 @@ struct ConstantCreator {
}
};
+template<class ConstantClass, class TypeClass>
+struct ConstantArrayCreator {
+ static ConstantClass *create(TypeClass *Ty, ArrayRef<Constant*> V) {
+ return new(V.size()) ConstantClass(Ty, V);
+ }
+};
+
template<class ConstantClass>
struct ConstantKeyData {
typedef void ValType;
@@ -447,7 +467,6 @@ struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
V.operands[0], V.operands[1]);
llvm_unreachable("Invalid ConstantExpr!");
- return 0;
}
};
@@ -467,90 +486,6 @@ struct ConstantKeyData<ConstantExpr> {
}
};
-// ConstantAggregateZero does not take extra "value" argument...
-template<class ValType>
-struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
- static ConstantAggregateZero *create(Type *Ty, const ValType &V){
- return new ConstantAggregateZero(Ty);
- }
-};
-
-template<>
-struct ConstantKeyData<ConstantVector> {
- typedef std::vector<Constant*> ValType;
- static ValType getValType(ConstantVector *CP) {
- std::vector<Constant*> Elements;
- Elements.reserve(CP->getNumOperands());
- for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
- Elements.push_back(CP->getOperand(i));
- return Elements;
- }
-};
-
-template<>
-struct ConstantKeyData<ConstantAggregateZero> {
- typedef char ValType;
- static ValType getValType(ConstantAggregateZero *C) {
- return 0;
- }
-};
-
-template<>
-struct ConstantKeyData<ConstantArray> {
- typedef std::vector<Constant*> ValType;
- static ValType getValType(ConstantArray *CA) {
- std::vector<Constant*> Elements;
- Elements.reserve(CA->getNumOperands());
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- Elements.push_back(cast<Constant>(CA->getOperand(i)));
- return Elements;
- }
-};
-
-template<>
-struct ConstantKeyData<ConstantStruct> {
- typedef std::vector<Constant*> ValType;
- static ValType getValType(ConstantStruct *CS) {
- std::vector<Constant*> Elements;
- Elements.reserve(CS->getNumOperands());
- for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
- Elements.push_back(cast<Constant>(CS->getOperand(i)));
- return Elements;
- }
-};
-
-// ConstantPointerNull does not take extra "value" argument...
-template<class ValType>
-struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
- static ConstantPointerNull *create(PointerType *Ty, const ValType &V){
- return new ConstantPointerNull(Ty);
- }
-};
-
-template<>
-struct ConstantKeyData<ConstantPointerNull> {
- typedef char ValType;
- static ValType getValType(ConstantPointerNull *C) {
- return 0;
- }
-};
-
-// UndefValue does not take extra "value" argument...
-template<class ValType>
-struct ConstantCreator<UndefValue, Type, ValType> {
- static UndefValue *create(Type *Ty, const ValType &V) {
- return new UndefValue(Ty);
- }
-};
-
-template<>
-struct ConstantKeyData<UndefValue> {
- typedef char ValType;
- static ValType getValType(UndefValue *C) {
- return 0;
- }
-};
-
template<>
struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
@@ -704,6 +639,129 @@ public:
}
};
+// Unique map for aggregate constants
+template<class TypeClass, class ConstantClass>
+class ConstantAggrUniqueMap {
+public:
+ typedef ArrayRef<Constant*> Operands;
+ typedef std::pair<TypeClass*, Operands> LookupKey;
+private:
+ struct MapInfo {
+ typedef DenseMapInfo<ConstantClass*> ConstantClassInfo;
+ typedef DenseMapInfo<Constant*> ConstantInfo;
+ typedef DenseMapInfo<TypeClass*> TypeClassInfo;
+ static inline ConstantClass* getEmptyKey() {
+ return ConstantClassInfo::getEmptyKey();
+ }
+ static inline ConstantClass* getTombstoneKey() {
+ return ConstantClassInfo::getTombstoneKey();
+ }
+ static unsigned getHashValue(const ConstantClass *CP) {
+ SmallVector<Constant*, 8> CPOperands;
+ CPOperands.reserve(CP->getNumOperands());
+ for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
+ CPOperands.push_back(CP->getOperand(I));
+ return getHashValue(LookupKey(CP->getType(), CPOperands));
+ }
+ static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
+ return LHS == RHS;
+ }
+ static unsigned getHashValue(const LookupKey &Val) {
+ return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
+ Val.second.end()));
+ }
+ static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ if (LHS.first != RHS->getType()
+ || LHS.second.size() != RHS->getNumOperands())
+ return false;
+ for (unsigned I = 0, E = RHS->getNumOperands(); I < E; ++I) {
+ if (LHS.second[I] != RHS->getOperand(I))
+ return false;
+ }
+ return true;
+ }
+ };
+public:
+ typedef DenseMap<ConstantClass *, char, MapInfo> MapTy;
+
+private:
+ /// Map - This is the main map from the element descriptor to the Constants.
+ /// This is the primary way we avoid creating two of the same shape
+ /// constant.
+ MapTy Map;
+
+public:
+ typename MapTy::iterator map_begin() { return Map.begin(); }
+ typename MapTy::iterator map_end() { return Map.end(); }
+
+ void freeConstants() {
+ for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+ I != E; ++I) {
+ // Asserts that use_empty().
+ delete I->first;
+ }
+ }
+
+private:
+ typename MapTy::iterator findExistingElement(ConstantClass *CP) {
+ return Map.find(CP);
+ }
+
+ ConstantClass *Create(TypeClass *Ty, Operands V, typename MapTy::iterator I) {
+ ConstantClass* Result =
+ ConstantArrayCreator<ConstantClass,TypeClass>::create(Ty, V);
+
+ assert(Result->getType() == Ty && "Type specified is not correct!");
+ Map[Result] = '\0';
+
+ return Result;
+ }
+public:
+
+ /// getOrCreate - Return the specified constant from the map, creating it if
+ /// necessary.
+ ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
+ LookupKey Lookup(Ty, V);
+ ConstantClass* Result = 0;
+
+ typename MapTy::iterator I = Map.find_as(Lookup);
+ // Is it in the map?
+ if (I != Map.end())
+ Result = I->first;
+
+ if (!Result) {
+ // If no preexisting value, create one now...
+ Result = Create(Ty, V, I);
+ }
+
+ return Result;
+ }
+
+ /// Find the constant by lookup key.
+ typename MapTy::iterator find(LookupKey Lookup) {
+ return Map.find_as(Lookup);
+ }
+
+ /// Insert the constant into its proper slot.
+ void insert(ConstantClass *CP) {
+ Map[CP] = '\0';
+ }
+
+ /// Remove this constant from the map
+ void remove(ConstantClass *CP) {
+ typename MapTy::iterator I = findExistingElement(CP);
+ assert(I != Map.end() && "Constant not found in constant table!");
+ assert(I->first == CP && "Didn't find correct element?");
+ Map.erase(I);
+ }
+
+ void dump() const {
+ DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+ }
+};
+
}
#endif
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index a505e4b4f5e5..a9cca22d0dd4 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Core.h"
+#include "llvm/Attributes.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -132,10 +133,11 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
switch (unwrap(Ty)->getTypeID()) {
- default:
- assert(false && "Unhandled TypeID.");
+ default: llvm_unreachable("Unhandled TypeID.");
case Type::VoidTyID:
return LLVMVoidTypeKind;
+ case Type::HalfTyID:
+ return LLVMHalfTypeKind;
case Type::FloatTyID:
return LLVMFloatTypeKind;
case Type::DoubleTyID:
@@ -222,6 +224,9 @@ unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
/*--.. Operations on real types ............................................--*/
+LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getHalfTy(*unwrap(C));
+}
LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
}
@@ -241,6 +246,9 @@ LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
}
+LLVMTypeRef LLVMHalfType(void) {
+ return LLVMHalfTypeInContext(LLVMGetGlobalContext());
+}
LLVMTypeRef LLVMFloatType(void) {
return LLVMFloatTypeInContext(LLVMGetGlobalContext());
}
@@ -558,6 +566,17 @@ void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRe
Dest[i] = wrap(N->getOperand(i));
}
+void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
+ LLVMValueRef Val)
+{
+ NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
+ if (!N)
+ return;
+ MDNode *Op = Val ? unwrap<MDNode>(Val) : NULL;
+ if (Op)
+ N->addOperand(Op);
+}
+
/*--.. Operations on scalar constants ......................................--*/
LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -614,8 +633,8 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
LLVMBool DontNullTerminate) {
/* Inverted the sense of AddNull because ', 0)' is a
better mnemonic for null termination than ', 1)'. */
- return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
- DontNullTerminate == 0));
+ return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length),
+ DontNullTerminate == 0));
}
LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
LLVMValueRef *ConstantVals,
@@ -660,8 +679,7 @@ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
static LLVMOpcode map_to_llvmopcode(int opcode)
{
switch (opcode) {
- default:
- assert(0 && "Unhandled Opcode.");
+ default: llvm_unreachable("Unhandled Opcode.");
#define HANDLE_INST(num, opc, clas) case num: return LLVM##opc;
#include "llvm/Instruction.def"
#undef HANDLE_INST
@@ -671,12 +689,11 @@ static LLVMOpcode map_to_llvmopcode(int opcode)
static int map_from_llvmopcode(LLVMOpcode code)
{
switch (code) {
- default:
- assert(0 && "Unhandled Opcode.");
#define HANDLE_INST(num, opc, clas) case LLVM##opc: return num;
#include "llvm/Instruction.def"
#undef HANDLE_INST
}
+ llvm_unreachable("Unhandled Opcode.");
}
/*--.. Constant expressions ................................................--*/
@@ -1040,8 +1057,6 @@ LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
switch (unwrap<GlobalValue>(Global)->getLinkage()) {
- default:
- assert(false && "Unhandled Linkage Type.");
case GlobalValue::ExternalLinkage:
return LLVMExternalLinkage;
case GlobalValue::AvailableExternallyLinkage:
@@ -1076,16 +1091,13 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMCommonLinkage;
}
- // Should never get here.
- return static_cast<LLVMLinkage>(0);
+ llvm_unreachable("Invalid GlobalValue linkage!");
}
void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
GlobalValue *GV = unwrap<GlobalValue>(Global);
switch (Linkage) {
- default:
- assert(false && "Unhandled Linkage Type.");
case LLVMExternalLinkage:
GV->setLinkage(GlobalValue::ExternalLinkage);
break;
@@ -1337,14 +1349,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
+ const AttrListPtr PALnew = PAL.addAttr(~0U, Attributes(PA));
Func->setAttributes(PALnew);
}
void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
+ const AttrListPtr PALnew = PAL.removeAttr(~0U, Attributes(PA));
Func->setAttributes(PALnew);
}
@@ -1352,7 +1364,7 @@ LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
Attributes attr = PAL.getFnAttributes();
- return (LLVMAttribute)attr;
+ return (LLVMAttribute)attr.Raw();
}
/*--.. Operations on parameters ............................................--*/
@@ -1414,18 +1426,18 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
}
void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
- unwrap<Argument>(Arg)->addAttr(PA);
+ unwrap<Argument>(Arg)->addAttr(Attributes(PA));
}
void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
- unwrap<Argument>(Arg)->removeAttr(PA);
+ unwrap<Argument>(Arg)->removeAttr(Attributes(PA));
}
LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
Attributes attr = A->getParent()->getAttributes().getParamAttributes(
A->getArgNo()+1);
- return (LLVMAttribute)attr;
+ return (LLVMAttribute)attr.Raw();
}
@@ -1603,10 +1615,9 @@ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
Value *V = unwrap(Instr);
if (CallInst *CI = dyn_cast<CallInst>(V))
return CI->getCallingConv();
- else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ if (InvokeInst *II = dyn_cast<InvokeInst>(V))
return II->getCallingConv();
llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
- return 0;
}
void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
@@ -1622,14 +1633,14 @@ void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
LLVMAttribute PA) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
Call.setAttributes(
- Call.getAttributes().addAttr(index, PA));
+ Call.getAttributes().addAttr(index, Attributes(PA)));
}
void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
LLVMAttribute PA) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
Call.setAttributes(
- Call.getAttributes().removeAttr(index, PA));
+ Call.getAttributes().removeAttr(index, Attributes(PA)));
}
void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
@@ -2055,6 +2066,20 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
}
+LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
+ Value *P = unwrap<Value>(MemAccessInst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->isVolatile();
+ return cast<StoreInst>(P)->isVolatile();
+}
+
+void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
+ Value *P = unwrap<Value>(MemAccessInst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->setVolatile(isVolatile);
+ return cast<StoreInst>(P)->setVolatile(isVolatile);
+}
+
/*--.. Casts ...............................................................--*/
LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
diff --git a/lib/VMCore/DebugInfoProbe.cpp b/lib/VMCore/DebugInfoProbe.cpp
deleted file mode 100644
index d1275ff58caa..000000000000
--- a/lib/VMCore/DebugInfoProbe.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements DebugInfoProbe. This probe can be used by a pass
-// manager to analyze how optimizer is treating debugging information.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "debuginfoprobe"
-#include "llvm/DebugInfoProbe.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Metadata.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringRef.h"
-#include <set>
-#include <string>
-
-using namespace llvm;
-
-static cl::opt<bool>
-EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden,
- cl::desc("Enable debug info probe"));
-
-// CreateInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbeImpl - This class implements a interface to monitor
-// how an optimization pass is preserving debugging information.
-
-namespace llvm {
-
- class DebugInfoProbeImpl {
- public:
- DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {}
- void initialize(StringRef PName, Function &F);
- void finalize(Function &F);
- void report();
- private:
- unsigned NumDbgLineLost, NumDbgValueLost;
- std::string PassName;
- Function *TheFn;
- std::set<MDNode *> DbgVariables;
- std::set<Instruction *> MissingDebugLoc;
- };
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbeImpl
-
-/// initialize - Collect information before running an optimization pass.
-void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) {
- if (!EnableDebugInfoProbe) return;
- PassName = PName;
-
- DbgVariables.clear();
- MissingDebugLoc.clear();
- TheFn = &F;
-
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown())
- MissingDebugLoc.insert(BI);
- if (!isa<DbgInfoIntrinsic>(BI)) continue;
- Value *Addr = NULL;
- MDNode *Node = NULL;
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
- Addr = DDI->getAddress();
- Node = DDI->getVariable();
- } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
- Addr = DVI->getValue();
- Node = DVI->getVariable();
- }
- if (Addr)
- DbgVariables.insert(Node);
- }
-}
-
-/// report - Report findings. This should be invoked after finalize.
-void DebugInfoProbeImpl::report() {
- if (!EnableDebugInfoProbe) return;
- if (NumDbgLineLost || NumDbgValueLost) {
- raw_ostream *OutStream = CreateInfoOutputFile();
- if (NumDbgLineLost)
- *OutStream << NumDbgLineLost
- << "\t times line number info lost by "
- << PassName << "\n";
- if (NumDbgValueLost)
- *OutStream << NumDbgValueLost
- << "\t times variable info lost by "
- << PassName << "\n";
- delete OutStream;
- }
- NumDbgLineLost = 0;
- NumDbgValueLost = 0;
-}
-
-/// finalize - Collect information after running an optimization pass. This
-/// must be used after initialization.
-void DebugInfoProbeImpl::finalize(Function &F) {
- if (!EnableDebugInfoProbe) return;
- assert (TheFn == &F && "Invalid function to measure!");
-
- std::set<MDNode *>DbgVariables2;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown() &&
- MissingDebugLoc.count(BI) == 0) {
- ++NumDbgLineLost;
- DEBUG(dbgs() << "DebugInfoProbe (" << PassName << "): --- ");
- DEBUG(BI->print(dbgs()));
- DEBUG(dbgs() << "\n");
- }
- if (!isa<DbgInfoIntrinsic>(BI)) continue;
- Value *Addr = NULL;
- MDNode *Node = NULL;
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
- Addr = DDI->getAddress();
- Node = DDI->getVariable();
- } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
- Addr = DVI->getValue();
- Node = DVI->getVariable();
- }
- if (Addr)
- DbgVariables2.insert(Node);
- }
-
- for (std::set<MDNode *>::iterator I = DbgVariables.begin(),
- E = DbgVariables.end(); I != E; ++I) {
- if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) {
- DEBUG(dbgs()
- << "DebugInfoProbe("
- << PassName
- << "): Losing dbg info for variable: ";
- if (MDString *MDS = dyn_cast_or_null<MDString>(
- (*I)->getOperand(2)))
- dbgs() << MDS->getString();
- else
- dbgs() << "...";
- dbgs() << "\n");
- ++NumDbgValueLost;
- }
- }
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbe
-
-DebugInfoProbe::DebugInfoProbe() {
- pImpl = new DebugInfoProbeImpl();
-}
-
-DebugInfoProbe::~DebugInfoProbe() {
- delete pImpl;
-}
-
-/// initialize - Collect information before running an optimization pass.
-void DebugInfoProbe::initialize(StringRef PName, Function &F) {
- pImpl->initialize(PName, F);
-}
-
-/// finalize - Collect information after running an optimization pass. This
-/// must be used after initialization.
-void DebugInfoProbe::finalize(Function &F) {
- pImpl->finalize(F);
-}
-
-/// report - Report findings. This should be invoked after finalize.
-void DebugInfoProbe::report() {
- pImpl->report();
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbeInfo
-
-/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
-/// them.
-DebugInfoProbeInfo::~DebugInfoProbeInfo() {
- if (!EnableDebugInfoProbe) return;
- for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(),
- E = Probes.end(); I != E; ++I) {
- I->second->report();
- delete I->second;
- }
- }
-
-/// initialize - Collect information before running an optimization pass.
-void DebugInfoProbeInfo::initialize(Pass *P, Function &F) {
- if (!EnableDebugInfoProbe) return;
- if (P->getAsPMDataManager())
- return;
-
- StringMapEntry<DebugInfoProbe *> &Entry =
- Probes.GetOrCreateValue(P->getPassName());
- DebugInfoProbe *&Probe = Entry.getValue();
- if (!Probe)
- Probe = new DebugInfoProbe();
- Probe->initialize(P->getPassName(), F);
-}
-
-/// finalize - Collect information after running an optimization pass. This
-/// must be used after initialization.
-void DebugInfoProbeInfo::finalize(Pass *P, Function &F) {
- if (!EnableDebugInfoProbe) return;
- if (P->getAsPMDataManager())
- return;
- StringMapEntry<DebugInfoProbe *> &Entry =
- Probes.GetOrCreateValue(P->getPassName());
- DebugInfoProbe *&Probe = Entry.getValue();
- assert (Probe && "DebugInfoProbe is not initialized!");
- Probe->finalize(F);
-}
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index 328244f80673..9013d28bb67d 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -173,10 +173,7 @@ DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() {
}
unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
- FoldingSetNodeID ID;
- ID.AddInteger(Key.LineCol);
- ID.AddInteger(Key.ScopeIdx);
- return ID.ComputeHash();
+ return static_cast<unsigned>(hash_combine(Key.LineCol, Key.ScopeIdx));
}
bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) {
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 08b845ef9d6b..219e6315cf4e 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -80,27 +80,187 @@ void DominatorTree::print(raw_ostream &OS, const Module *) const {
DT->print(OS);
}
-// dominates - Return true if A dominates a use in B. This performs the
-// special checks necessary if A and B are in the same basic block.
-bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
- const BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
-
- // If A is an invoke instruction, its value is only available in this normal
- // successor block.
- if (const InvokeInst *II = dyn_cast<InvokeInst>(A))
- BBA = II->getNormalDest();
-
- if (BBA != BBB) return dominates(BBA, BBB);
-
- // It is not possible to determine dominance between two PHI nodes
- // based on their ordering.
- if (isa<PHINode>(A) && isa<PHINode>(B))
+// dominates - Return true if Def dominates a use in User. This performs
+// the special checks necessary if Def and User are in the same basic block.
+// Note that Def doesn't dominate a use in Def itself!
+bool DominatorTree::dominates(const Instruction *Def,
+ const Instruction *User) const {
+ const BasicBlock *UseBB = User->getParent();
+ const BasicBlock *DefBB = Def->getParent();
+
+ // Any unreachable use is dominated, even if Def == User.
+ if (!isReachableFromEntry(UseBB))
+ return true;
+
+ // Unreachable definitions don't dominate anything.
+ if (!isReachableFromEntry(DefBB))
+ return false;
+
+ // An instruction doesn't dominate a use in itself.
+ if (Def == User)
return false;
-
- // Loop through the basic block until we find A or B.
- BasicBlock::const_iterator I = BBA->begin();
- for (; &*I != A && &*I != B; ++I)
+
+ // The value defined by an invoke dominates an instruction only if
+ // it dominates every instruction in UseBB.
+ // A PHI is dominated only if the instruction dominates every possible use
+ // in the UseBB.
+ if (isa<InvokeInst>(Def) || isa<PHINode>(User))
+ return dominates(Def, UseBB);
+
+ if (DefBB != UseBB)
+ return dominates(DefBB, UseBB);
+
+ // Loop through the basic block until we find Def or User.
+ BasicBlock::const_iterator I = DefBB->begin();
+ for (; &*I != Def && &*I != User; ++I)
/*empty*/;
-
- return &*I == A;
+
+ return &*I == Def;
+}
+
+// true if Def would dominate a use in any instruction in UseBB.
+// note that dominates(Def, Def->getParent()) is false.
+bool DominatorTree::dominates(const Instruction *Def,
+ const BasicBlock *UseBB) const {
+ const BasicBlock *DefBB = Def->getParent();
+
+ // Any unreachable use is dominated, even if DefBB == UseBB.
+ if (!isReachableFromEntry(UseBB))
+ return true;
+
+ // Unreachable definitions don't dominate anything.
+ if (!isReachableFromEntry(DefBB))
+ return false;
+
+ if (DefBB == UseBB)
+ return false;
+
+ const InvokeInst *II = dyn_cast<InvokeInst>(Def);
+ if (!II)
+ return dominates(DefBB, UseBB);
+
+ // Invoke results are only usable in the normal destination, not in the
+ // exceptional destination.
+ BasicBlock *NormalDest = II->getNormalDest();
+ if (!dominates(NormalDest, UseBB))
+ return false;
+
+ // Simple case: if the normal destination has a single predecessor, the
+ // fact that it dominates the use block implies that we also do.
+ if (NormalDest->getSinglePredecessor())
+ return true;
+
+ // The normal edge from the invoke is critical. Conceptually, what we would
+ // like to do is split it and check if the new block dominates the use.
+ // With X being the new block, the graph would look like:
+ //
+ // DefBB
+ // /\ . .
+ // / \ . .
+ // / \ . .
+ // / \ | |
+ // A X B C
+ // | \ | /
+ // . \|/
+ // . NormalDest
+ // .
+ //
+ // Given the definition of dominance, NormalDest is dominated by X iff X
+ // dominates all of NormalDest's predecessors (X, B, C in the example). X
+ // trivially dominates itself, so we only have to find if it dominates the
+ // other predecessors. Since the only way out of X is via NormalDest, X can
+ // only properly dominate a node if NormalDest dominates that node too.
+ for (pred_iterator PI = pred_begin(NormalDest),
+ E = pred_end(NormalDest); PI != E; ++PI) {
+ const BasicBlock *BB = *PI;
+ if (BB == DefBB)
+ continue;
+
+ if (!DT->isReachableFromEntry(BB))
+ continue;
+
+ if (!dominates(NormalDest, BB))
+ return false;
+ }
+ return true;
+}
+
+bool DominatorTree::dominates(const Instruction *Def,
+ const Use &U) const {
+ Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
+
+ // Instructions do not dominate non-instructions.
+ if (!UserInst)
+ return false;
+
+ const BasicBlock *DefBB = Def->getParent();
+
+ // Determine the block in which the use happens. PHI nodes use
+ // their operands on edges; simulate this by thinking of the use
+ // happening at the end of the predecessor block.
+ const BasicBlock *UseBB;
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UseBB = PN->getIncomingBlock(U);
+ else
+ UseBB = UserInst->getParent();
+
+ // Any unreachable use is dominated, even if Def == User.
+ if (!isReachableFromEntry(UseBB))
+ return true;
+
+ // Unreachable definitions don't dominate anything.
+ if (!isReachableFromEntry(DefBB))
+ return false;
+
+ // Invoke instructions define their return values on the edges
+ // to their normal successors, so we have to handle them specially.
+ // Among other things, this means they don't dominate anything in
+ // their own block, except possibly a phi, so we don't need to
+ // walk the block in any case.
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
+ // A PHI in the normal successor using the invoke's return value is
+ // dominated by the invoke's return value.
+ if (isa<PHINode>(UserInst) &&
+ UserInst->getParent() == II->getNormalDest() &&
+ cast<PHINode>(UserInst)->getIncomingBlock(U) == DefBB)
+ return true;
+
+ // Otherwise use the instruction-dominates-block query, which
+ // handles the crazy case of an invoke with a critical edge
+ // properly.
+ return dominates(Def, UseBB);
+ }
+
+ // If the def and use are in different blocks, do a simple CFG dominator
+ // tree query.
+ if (DefBB != UseBB)
+ return dominates(DefBB, UseBB);
+
+ // Ok, def and use are in the same block. If the def is an invoke, it
+ // doesn't dominate anything in the block. If it's a PHI, it dominates
+ // everything in the block.
+ if (isa<PHINode>(UserInst))
+ return true;
+
+ // Otherwise, just loop through the basic block until we find Def or User.
+ BasicBlock::const_iterator I = DefBB->begin();
+ for (; &*I != Def && &*I != UserInst; ++I)
+ /*empty*/;
+
+ return &*I != UserInst;
+}
+
+bool DominatorTree::isReachableFromEntry(const Use &U) const {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+
+ // ConstantExprs aren't really reachable from the entry block, but they
+ // don't need to be treated like unreachable code either.
+ if (!I) return true;
+
+ // PHI nodes use their operands on their incoming edges.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ return isReachableFromEntry(PN->getIncomingBlock(U));
+
+ // Everything else uses their operands in their own block.
+ return isReachableFromEntry(I->getParent());
}
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 1215e6a57ced..af6344ef6168 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -39,6 +39,8 @@ template class llvm::SymbolTableListTraits<BasicBlock, Function>;
// Argument Implementation
//===----------------------------------------------------------------------===//
+void Argument::anchor() { }
+
Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
: Value(Ty, Value::ArgumentVal) {
Parent = 0;
@@ -359,7 +361,7 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
FunctionType *Intrinsic::getType(LLVMContext &Context,
ID id, ArrayRef<Type*> Tys) {
Type *ResultTy = NULL;
- std::vector<Type*> ArgTys;
+ SmallVector<Type*, 8> ArgTys;
bool IsVarArg = false;
#define GET_INTRINSIC_GENERATOR
@@ -370,13 +372,9 @@ FunctionType *Intrinsic::getType(LLVMContext &Context,
}
bool Intrinsic::isOverloaded(ID id) {
- static const bool OTable[] = {
- false,
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "llvm/Intrinsics.gen"
#undef GET_INTRINSIC_OVERLOAD_TABLE
- };
- return OTable[id];
}
/// This defines the "Intrinsic::getAttributes(ID id)" method.
@@ -402,6 +400,7 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
bool Function::hasAddressTaken(const User* *PutOffender) const {
for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
const User *U = *I;
+ // FIXME: Check for blockaddress, which does not take the address.
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
return PutOffender ? (*PutOffender = U, true) : true;
ImmutableCallSite CS(cast<Instruction>(U));
@@ -411,41 +410,30 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
return false;
}
+bool Function::isDefTriviallyDead() const {
+ // Check the linkage
+ if (!hasLinkOnceLinkage() && !hasLocalLinkage() &&
+ !hasAvailableExternallyLinkage())
+ return false;
+
+ // Check if the function is used by anything other than a blockaddress.
+ for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+ if (!isa<BlockAddress>(*I))
+ return false;
+
+ return true;
+}
+
/// callsFunctionThatReturnsTwice - Return true if the function has a call to
/// setjmp or other function that gcc recognizes as "returning twice".
-///
-/// FIXME: Remove after <rdar://problem/8031714> is fixed.
-/// FIXME: Is the above FIXME valid?
bool Function::callsFunctionThatReturnsTwice() const {
- static const char *const ReturnsTwiceFns[] = {
- "_setjmp",
- "setjmp",
- "sigsetjmp",
- "setjmp_syscall",
- "savectx",
- "qsetjmp",
- "vfork",
- "getcontext"
- };
-
- for (const_inst_iterator I = inst_begin(this), E = inst_end(this); I != E;
- ++I) {
+ for (const_inst_iterator
+ I = inst_begin(this), E = inst_end(this); I != E; ++I) {
const CallInst* callInst = dyn_cast<CallInst>(&*I);
if (!callInst)
continue;
if (callInst->canReturnTwice())
return true;
-
- // check for known function names.
- // FIXME: move this to clang.
- Function *F = callInst->getCalledFunction();
- if (!F)
- continue;
- StringRef Name = F->getName();
- for (unsigned J = 0, e = array_lengthof(ReturnsTwiceFns); J != e; ++J) {
- if (Name == ReturnsTwiceFns[J])
- return true;
- }
}
return false;
diff --git a/lib/VMCore/GCOV.cpp b/lib/VMCore/GCOV.cpp
index fc7f96fccaaa..595c45235995 100644
--- a/lib/VMCore/GCOV.cpp
+++ b/lib/VMCore/GCOV.cpp
@@ -107,7 +107,7 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) {
for (unsigned i = 0, e = Count; i != e; ++i) {
Blocks[i]->addCount(Buff.readInt64());
}
- return true;;
+ return true;
}
LineNumber = Buff.readInt();
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 5114e2d498c5..b45923489af4 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -24,10 +24,10 @@ using namespace llvm;
/// specified. If Name is specified, it is the name of the global variable
/// created.
Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
- Constant *StrConstant = ConstantArray::get(Context, Str, true);
+ Constant *StrConstant = ConstantDataArray::getString(Context, Str);
Module &M = *BB->getParent()->getParent();
GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
- true, GlobalValue::InternalLinkage,
+ true, GlobalValue::PrivateLinkage,
StrConstant, "", 0, false);
GV->setName(Name);
GV->setUnnamedAddr(true);
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 73191c19658c..5449714280d3 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -102,7 +102,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case IndirectBr: return "indirectbr";
case Invoke: return "invoke";
case Resume: return "resume";
- case Unwind: return "unwind";
case Unreachable: return "unreachable";
// Standard binary operators...
@@ -166,8 +165,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
default: return "<Invalid operator> ";
}
-
- return 0;
}
/// isIdenticalTo - Return true if the specified instruction is exactly
@@ -391,59 +388,6 @@ bool Instruction::isCommutative(unsigned op) {
}
}
-bool Instruction::isSafeToSpeculativelyExecute() const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (Constant *C = dyn_cast<Constant>(getOperand(i)))
- if (C->canTrap())
- return false;
-
- switch (getOpcode()) {
- default:
- return true;
- case UDiv:
- case URem: {
- // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
- ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
- return Op && !Op->isNullValue();
- }
- case SDiv:
- case SRem: {
- // x / y is undefined if y == 0, and might be undefined if y == -1,
- // but calcuations like x / 3 are safe.
- ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
- return Op && !Op->isNullValue() && !Op->isAllOnesValue();
- }
- case Load: {
- const LoadInst *LI = cast<LoadInst>(this);
- if (!LI->isUnordered())
- return false;
- return LI->getPointerOperand()->isDereferenceablePointer();
- }
- case Call:
- return false; // The called function could have undefined behavior or
- // side-effects.
- // FIXME: We should special-case some intrinsics (bswap,
- // overflow-checking arithmetic, etc.)
- case VAArg:
- case Alloca:
- case Invoke:
- case PHI:
- case Store:
- case Ret:
- case Br:
- case IndirectBr:
- case Switch:
- case Unwind:
- case Unreachable:
- case Fence:
- case LandingPad:
- case AtomicRMW:
- case AtomicCmpXchg:
- case Resume:
- return false; // Misc instructions which have effects
- }
-}
-
Instruction *Instruction::clone() const {
Instruction *New = clone_impl();
New->SubclassOptionalData = SubclassOptionalData;
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index b3a720527a8e..8db6ac9a33f4 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -625,40 +625,12 @@ void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
llvm_unreachable("ReturnInst has no successors!");
- return 0;
}
ReturnInst::~ReturnInst() {
}
//===----------------------------------------------------------------------===//
-// UnwindInst Implementation
-//===----------------------------------------------------------------------===//
-
-UnwindInst::UnwindInst(LLVMContext &Context, Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
- 0, 0, InsertBefore) {
-}
-UnwindInst::UnwindInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
- 0, 0, InsertAtEnd) {
-}
-
-
-unsigned UnwindInst::getNumSuccessorsV() const {
- return getNumSuccessors();
-}
-
-void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
- llvm_unreachable("UnwindInst has no successors!");
-}
-
-BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
- llvm_unreachable("UnwindInst has no successors!");
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
// ResumeInst Implementation
//===----------------------------------------------------------------------===//
@@ -690,7 +662,6 @@ void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
llvm_unreachable("ResumeInst has no successors!");
- return 0;
}
//===----------------------------------------------------------------------===//
@@ -712,12 +683,11 @@ unsigned UnreachableInst::getNumSuccessorsV() const {
}
void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
- llvm_unreachable("UnwindInst has no successors!");
+ llvm_unreachable("UnreachableInst has no successors!");
}
BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
- llvm_unreachable("UnwindInst has no successors!");
- return 0;
+ llvm_unreachable("UnreachableInst has no successors!");
}
//===----------------------------------------------------------------------===//
@@ -1359,6 +1329,15 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
///
template <typename IndexTy>
static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+ if (Ptr->isVectorTy()) {
+ assert(IdxList.size() == 1 &&
+ "GEP with vector pointers must have a single index");
+ PointerType *PTy = dyn_cast<PointerType>(
+ cast<VectorType>(Ptr)->getElementType());
+ assert(PTy && "Gep with invalid vector pointer found");
+ return PTy->getElementType();
+ }
+
PointerType *PTy = dyn_cast<PointerType>(Ptr);
if (!PTy) return 0; // Type isn't a pointer type!
Type *Agg = PTy->getElementType();
@@ -1366,7 +1345,7 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
// Handle the special case of the empty set index set, which is always valid.
if (IdxList.empty())
return Agg;
-
+
// If there is at least one index, the top level type must be sized, otherwise
// it cannot be 'stepped over'.
if (!Agg->isSized())
@@ -1396,6 +1375,18 @@ Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
return getIndexedTypeInternal(Ptr, IdxList);
}
+unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) {
+ Type *Ty = Ptr->getType();
+
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ Ty = VTy->getElementType();
+
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ return PTy->getAddressSpace();
+
+ llvm_unreachable("Invalid GEP pointer type");
+}
+
/// hasAllZeroIndices - Return true if all of the indices of this GEP are
/// zeros. If so, the result pointer and the first operand have the same
/// value, just potentially different types.
@@ -1558,46 +1549,84 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
const Value *Mask) {
+ // V1 and V2 must be vectors of the same type.
if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
return false;
+ // Mask must be vector of i32.
VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
return false;
// Check to see if Mask is valid.
+ if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask))
+ return true;
+
if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
- VectorType *VTy = cast<VectorType>(V1->getType());
+ unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
- if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
- if (CI->uge(VTy->getNumElements()*2))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+ if (CI->uge(V1Size*2))
return false;
} else if (!isa<UndefValue>(MV->getOperand(i))) {
return false;
}
}
+ return true;
}
- else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
- return false;
- return true;
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(Mask)) {
+ unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+ for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
+ if (CDS->getElementAsInteger(i) >= V1Size*2)
+ return false;
+ return true;
+ }
+
+ // The bitcode reader can create a place holder for a forward reference
+ // used as the shuffle mask. When this occurs, the shuffle mask will
+ // fall into this case and fail. To avoid this error, do this bit of
+ // ugliness to allow such a mask pass.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask))
+ if (CE->getOpcode() == Instruction::UserOp1)
+ return true;
+
+ return false;
}
/// getMaskValue - Return the index from the shuffle mask for the specified
/// output result. This is either -1 if the element is undef or a number less
/// than 2*numelements.
-int ShuffleVectorInst::getMaskValue(unsigned i) const {
- const Constant *Mask = cast<Constant>(getOperand(2));
- if (isa<UndefValue>(Mask)) return -1;
- if (isa<ConstantAggregateZero>(Mask)) return 0;
- const ConstantVector *MaskCV = cast<ConstantVector>(Mask);
- assert(i < MaskCV->getNumOperands() && "Index out of range");
-
- if (isa<UndefValue>(MaskCV->getOperand(i)))
+int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
+ assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
+ if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
+ return CDS->getElementAsInteger(i);
+ Constant *C = Mask->getAggregateElement(i);
+ if (isa<UndefValue>(C))
return -1;
- return cast<ConstantInt>(MaskCV->getOperand(i))->getZExtValue();
+ return cast<ConstantInt>(C)->getZExtValue();
+}
+
+/// getShuffleMask - Return the full mask for this instruction, where each
+/// element is the element number and undef's are returned as -1.
+void ShuffleVectorInst::getShuffleMask(Constant *Mask,
+ SmallVectorImpl<int> &Result) {
+ unsigned NumElts = Mask->getType()->getVectorNumElements();
+
+ if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Result.push_back(CDS->getElementAsInteger(i));
+ return;
+ }
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *C = Mask->getAggregateElement(i);
+ Result.push_back(isa<UndefValue>(C) ? -1 :
+ cast<ConstantInt>(C)->getZExtValue());
+ }
}
+
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
@@ -1848,46 +1877,27 @@ BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
Instruction *InsertBefore) {
Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
- return new BinaryOperator(Instruction::FSub,
- zero, Op,
+ return new BinaryOperator(Instruction::FSub, zero, Op,
Op->getType(), Name, InsertBefore);
}
BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd) {
Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
- return new BinaryOperator(Instruction::FSub,
- zero, Op,
+ return new BinaryOperator(Instruction::FSub, zero, Op,
Op->getType(), Name, InsertAtEnd);
}
BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
Instruction *InsertBefore) {
- Constant *C;
- if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
- C = Constant::getAllOnesValue(PTy->getElementType());
- C = ConstantVector::get(
- std::vector<Constant*>(PTy->getNumElements(), C));
- } else {
- C = Constant::getAllOnesValue(Op->getType());
- }
-
+ Constant *C = Constant::getAllOnesValue(Op->getType());
return new BinaryOperator(Instruction::Xor, Op, C,
Op->getType(), Name, InsertBefore);
}
BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd) {
- Constant *AllOnes;
- if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
- // Create a vector of all ones values.
- Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
- AllOnes = ConstantVector::get(
- std::vector<Constant*>(PTy->getNumElements(), Elt));
- } else {
- AllOnes = Constant::getAllOnesValue(Op->getType());
- }
-
+ Constant *AllOnes = Constant::getAllOnesValue(Op->getType());
return new BinaryOperator(Instruction::Xor, Op, AllOnes,
Op->getType(), Name, InsertAtEnd);
}
@@ -1895,10 +1905,8 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
// isConstantAllOnes - Helper function for several functions below
static inline bool isConstantAllOnes(const Value *V) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
- return CI->isAllOnesValue();
- if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
- return CV->isAllOnesValue();
+ if (const Constant *C = dyn_cast<Constant>(V))
+ return C->isAllOnesValue();
return false;
}
@@ -1998,6 +2006,8 @@ bool BinaryOperator::isExact() const {
// CastInst Class
//===----------------------------------------------------------------------===//
+void CastInst::anchor() {}
+
// Just determine if this cast only deals with integral->integral conversion.
bool CastInst::isIntegerCast() const {
switch (getOpcode()) {
@@ -2042,8 +2052,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
Type *DestTy,
Type *IntPtrTy) {
switch (Opcode) {
- default:
- assert(0 && "Invalid CastOp");
+ default: llvm_unreachable("Invalid CastOp");
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -2236,13 +2245,10 @@ unsigned CastInst::isEliminableCastPair(
case 99:
// cast combination can't happen (error in input). This is for all cases
// where the MidTy is not the same for the two cast instructions.
- assert(0 && "Invalid Cast Combination");
- return 0;
+ llvm_unreachable("Invalid Cast Combination");
default:
- assert(0 && "Error in CastResults table!!!");
- return 0;
+ llvm_unreachable("Error in CastResults table!!!");
}
- return 0;
}
CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
@@ -2262,10 +2268,8 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
- default:
- assert(0 && "Invalid opcode provided");
+ default: llvm_unreachable("Invalid opcode provided");
}
- return 0;
}
CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
@@ -2285,10 +2289,8 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
- default:
- assert(0 && "Invalid opcode provided");
+ default: llvm_unreachable("Invalid opcode provided");
}
- return 0;
}
CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
@@ -2557,9 +2559,8 @@ CastInst::getCastOpcode(
assert(DestBits == SrcBits &&
"Casting vector to floating point of different width");
return BitCast; // same size, no-op cast
- } else {
- llvm_unreachable("Casting pointer or non-first class to float");
}
+ llvm_unreachable("Casting pointer or non-first class to float");
} else if (DestTy->isVectorTy()) {
assert(DestBits == SrcBits &&
"Illegal cast to vector (wrong type or size)");
@@ -2569,24 +2570,16 @@ CastInst::getCastOpcode(
return BitCast; // ptr -> ptr
} else if (SrcTy->isIntegerTy()) {
return IntToPtr; // int -> ptr
- } else {
- assert(0 && "Casting pointer to other than pointer or int");
}
+ llvm_unreachable("Casting pointer to other than pointer or int");
} else if (DestTy->isX86_MMXTy()) {
if (SrcTy->isVectorTy()) {
assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
return BitCast; // 64-bit vector to MMX
- } else {
- assert(0 && "Illegal cast to X86_MMX");
}
- } else {
- assert(0 && "Casting to type that is not first-class");
+ llvm_unreachable("Illegal cast to X86_MMX");
}
-
- // If we fall through to here we probably hit an assertion cast above
- // and assertions are not turned on. Anything we return is an error, so
- // BitCast is as good a choice as any.
- return BitCast;
+ llvm_unreachable("Casting to type that is not first-class");
}
//===----------------------------------------------------------------------===//
@@ -2645,9 +2638,21 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcLength == DstLength;
case Instruction::PtrToInt:
- return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+ if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+ return false;
+ if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+ if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+ return false;
+ return SrcTy->getScalarType()->isPointerTy() &&
+ DstTy->getScalarType()->isIntegerTy();
case Instruction::IntToPtr:
- return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+ if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+ return false;
+ if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+ if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+ return false;
+ return SrcTy->getScalarType()->isIntegerTy() &&
+ DstTy->getScalarType()->isPointerTy();
case Instruction::BitCast:
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
@@ -2890,7 +2895,7 @@ bool CmpInst::isEquality() const {
CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
switch (pred) {
- default: assert(0 && "Unknown cmp predicate!");
+ default: llvm_unreachable("Unknown cmp predicate!");
case ICMP_EQ: return ICMP_NE;
case ICMP_NE: return ICMP_EQ;
case ICMP_UGT: return ICMP_ULE;
@@ -2923,7 +2928,7 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
- default: assert(0 && "Unknown icmp predicate!");
+ default: llvm_unreachable("Unknown icmp predicate!");
case ICMP_EQ: case ICMP_NE:
case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
return pred;
@@ -2936,7 +2941,7 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
switch (pred) {
- default: assert(0 && "Unknown icmp predicate!");
+ default: llvm_unreachable("Unknown icmp predicate!");
case ICMP_EQ: case ICMP_NE:
case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
return pred;
@@ -3012,7 +3017,7 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
switch (pred) {
- default: assert(0 && "Unknown cmp predicate!");
+ default: llvm_unreachable("Unknown cmp predicate!");
case ICMP_EQ: case ICMP_NE:
return pred;
case ICMP_SGT: return ICMP_SLT;
@@ -3147,31 +3152,32 @@ SwitchInst::~SwitchInst() {
/// addCase - Add an entry to the switch instruction...
///
void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+ unsigned NewCaseIdx = getNumCases();
unsigned OpNo = NumOperands;
if (OpNo+2 > ReservedSpace)
growOperands(); // Get more space!
// Initialize some new operands.
assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
NumOperands = OpNo+2;
- OperandList[OpNo] = OnVal;
- OperandList[OpNo+1] = Dest;
+ CaseIt Case(this, NewCaseIdx);
+ Case.setValue(OnVal);
+ Case.setSuccessor(Dest);
}
-/// removeCase - This method removes the specified successor from the switch
-/// instruction. Note that this cannot be used to remove the default
-/// destination (successor #0).
-///
-void SwitchInst::removeCase(unsigned idx) {
- assert(idx != 0 && "Cannot remove the default case!");
- assert(idx*2 < getNumOperands() && "Successor index out of range!!!");
+/// removeCase - This method removes the specified case and its successor
+/// from the switch instruction.
+void SwitchInst::removeCase(CaseIt i) {
+ unsigned idx = i.getCaseIndex();
+
+ assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
unsigned NumOps = getNumOperands();
Use *OL = OperandList;
// Overwrite this case with the end of the list.
- if ((idx + 1) * 2 != NumOps) {
- OL[idx * 2] = OL[NumOps - 2];
- OL[idx * 2 + 1] = OL[NumOps - 1];
+ if (2 + (idx + 1) * 2 != NumOps) {
+ OL[2 + idx * 2] = OL[NumOps - 2];
+ OL[2 + idx * 2 + 1] = OL[NumOps - 1];
}
// Nuke the last value.
@@ -3438,15 +3444,11 @@ ExtractElementInst *ExtractElementInst::clone_impl() const {
}
InsertElementInst *InsertElementInst::clone_impl() const {
- return InsertElementInst::Create(getOperand(0),
- getOperand(1),
- getOperand(2));
+ return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2));
}
ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
- return new ShuffleVectorInst(getOperand(0),
- getOperand(1),
- getOperand(2));
+ return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2));
}
PHINode *PHINode::clone_impl() const {
@@ -3482,11 +3484,6 @@ ResumeInst *ResumeInst::clone_impl() const {
return new(1) ResumeInst(*this);
}
-UnwindInst *UnwindInst::clone_impl() const {
- LLVMContext &Context = getContext();
- return new UnwindInst(Context);
-}
-
UnreachableInst *UnreachableInst::clone_impl() const {
LLVMContext &Context = getContext();
return new UnreachableInst(Context);
diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt
new file mode 100644
index 000000000000..bca8b2c97e95
--- /dev/null
+++ b/lib/VMCore/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/VMCore/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Core
+parent = Libraries
+required_libraries = Support
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index ebd1e0aa1b0f..68c56212bc6c 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -43,6 +43,16 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
// Create the 'prof' metadata kind.
unsigned ProfID = getMDKindID("prof");
assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
+
+ // Create the 'fpaccuracy' metadata kind.
+ unsigned FPAccuracyID = getMDKindID("fpaccuracy");
+ assert(FPAccuracyID == MD_fpaccuracy && "fpaccuracy kind id drifted");
+ (void)FPAccuracyID;
+
+ // Create the 'range' metadata kind.
+ unsigned RangeID = getMDKindID("range");
+ assert(RangeID == MD_range && "range kind id drifted");
+ (void)RangeID;
}
LLVMContext::~LLVMContext() { delete pImpl; }
@@ -78,11 +88,11 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const {
return pImpl->InlineAsmDiagContext;
}
-void LLVMContext::emitError(StringRef ErrorStr) {
+void LLVMContext::emitError(const Twine &ErrorStr) {
emitError(0U, ErrorStr);
}
-void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
unsigned LocCookie = 0;
if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
if (SrcLoc->getNumOperands() != 0)
@@ -92,7 +102,7 @@ void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
return emitError(LocCookie, ErrorStr);
}
-void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
// If there is no error handler installed, just print the error and exit.
if (pImpl->InlineAsmDiagHandler == 0) {
errs() << "error: " << ErrorStr << "\n";
@@ -100,7 +110,7 @@ void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
}
// If we do have an error handler, we can report the error and keep going.
- SMDiagnostic Diag("", "error: " + ErrorStr.str());
+ SMDiagnostic Diag("", SourceMgr::DK_Error, ErrorStr.str());
pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
}
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 504b37267f70..6279bb823dbf 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -21,6 +21,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
: TheTrueVal(0), TheFalseVal(0),
VoidTy(C, Type::VoidTyID),
LabelTy(C, Type::LabelTyID),
+ HalfTy(C, Type::HalfTyID),
FloatTy(C, Type::FloatTyID),
DoubleTy(C, Type::DoubleTyID),
MetadataTy(C, Type::MetadataTyID),
@@ -47,6 +48,16 @@ struct DropReferences {
P.second->dropAllReferences();
}
};
+
+// Temporary - drops pair.first instead of second.
+struct DropFirst {
+ // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+ // is a Constant*.
+ template<typename PairT>
+ void operator()(const PairT &P) {
+ P.first->dropAllReferences();
+ }
+};
}
LLVMContextImpl::~LLVMContextImpl() {
@@ -57,25 +68,32 @@ LLVMContextImpl::~LLVMContextImpl() {
std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
DeleteContainerPointers(Modules);
+ // Free the constants. This is important to do here to ensure that they are
+ // freed before the LeakDetector is torn down.
std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
DropReferences());
std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
- DropReferences());
+ DropFirst());
std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
- DropReferences());
+ DropFirst());
std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
- DropReferences());
+ DropFirst());
ExprConstants.freeConstants();
ArrayConstants.freeConstants();
StructConstants.freeConstants();
VectorConstants.freeConstants();
- AggZeroConstants.freeConstants();
- NullPtrConstants.freeConstants();
- UndefValueConstants.freeConstants();
+ DeleteContainerSeconds(CAZConstants);
+ DeleteContainerSeconds(CPNConstants);
+ DeleteContainerSeconds(UVConstants);
InlineAsms.freeConstants();
DeleteContainerSeconds(IntConstants);
DeleteContainerSeconds(FPConstants);
+ for (StringMap<ConstantDataSequential*>::iterator I = CDSConstants.begin(),
+ E = CDSConstants.end(); I != E; ++I)
+ delete I->second;
+ CDSConstants.clear();
+
// Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
// and the NonUniquedMDNodes sets, so copy the values out first.
SmallVector<MDNode*, 8> MDNodes;
@@ -92,3 +110,24 @@ LLVMContextImpl::~LLVMContextImpl() {
// Destroy MDStrings.
DeleteContainerSeconds(MDStringCache);
}
+
+// ConstantsContext anchors
+void UnaryConstantExpr::anchor() { }
+
+void BinaryConstantExpr::anchor() { }
+
+void SelectConstantExpr::anchor() { }
+
+void ExtractElementConstantExpr::anchor() { }
+
+void InsertElementConstantExpr::anchor() { }
+
+void ShuffleVectorConstantExpr::anchor() { }
+
+void ExtractValueConstantExpr::anchor() { }
+
+void InsertValueConstantExpr::anchor() { }
+
+void GetElementPtrConstantExpr::anchor() { }
+
+void CompareConstantExpr::anchor() { }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index a3f68fecbbf6..2252028b1569 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -29,6 +29,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Hashing.h"
#include <vector>
namespace llvm {
@@ -51,12 +52,14 @@ struct DenseMapAPIntKeyInfo {
bool operator!=(const KeyTy& that) const {
return !this->operator==(that);
}
+ friend hash_code hash_value(const KeyTy &Key) {
+ return hash_combine(Key.type, Key.val);
+ }
};
static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
static unsigned getHashValue(const KeyTy &Key) {
- return DenseMapInfo<void*>::getHashValue(Key.type) ^
- Key.val.getHashValue();
+ return static_cast<unsigned>(hash_value(Key));
}
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
return LHS == RHS;
@@ -74,6 +77,9 @@ struct DenseMapAPFloatKeyInfo {
bool operator!=(const KeyTy& that) const {
return !this->operator==(that);
}
+ friend hash_code hash_value(const KeyTy &Key) {
+ return hash_combine(Key.val);
+ }
};
static inline KeyTy getEmptyKey() {
return KeyTy(APFloat(APFloat::Bogus,1));
@@ -82,13 +88,132 @@ struct DenseMapAPFloatKeyInfo {
return KeyTy(APFloat(APFloat::Bogus,2));
}
static unsigned getHashValue(const KeyTy &Key) {
- return Key.val.getHashValue();
+ return static_cast<unsigned>(hash_value(Key));
}
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
return LHS == RHS;
}
};
+struct AnonStructTypeKeyInfo {
+ struct KeyTy {
+ ArrayRef<Type*> ETypes;
+ bool isPacked;
+ KeyTy(const ArrayRef<Type*>& E, bool P) :
+ ETypes(E), isPacked(P) {}
+ KeyTy(const KeyTy& that) :
+ ETypes(that.ETypes), isPacked(that.isPacked) {}
+ KeyTy(const StructType* ST) :
+ ETypes(ArrayRef<Type*>(ST->element_begin(), ST->element_end())),
+ isPacked(ST->isPacked()) {}
+ bool operator==(const KeyTy& that) const {
+ if (isPacked != that.isPacked)
+ return false;
+ if (ETypes != that.ETypes)
+ return false;
+ return true;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline StructType* getEmptyKey() {
+ return DenseMapInfo<StructType*>::getEmptyKey();
+ }
+ static inline StructType* getTombstoneKey() {
+ return DenseMapInfo<StructType*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const KeyTy& Key) {
+ return hash_combine(hash_combine_range(Key.ETypes.begin(),
+ Key.ETypes.end()),
+ Key.isPacked);
+ }
+ static unsigned getHashValue(const StructType *ST) {
+ return getHashValue(KeyTy(ST));
+ }
+ static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ return LHS == KeyTy(RHS);
+ }
+ static bool isEqual(const StructType *LHS, const StructType *RHS) {
+ return LHS == RHS;
+ }
+};
+
+struct FunctionTypeKeyInfo {
+ struct KeyTy {
+ const Type *ReturnType;
+ ArrayRef<Type*> Params;
+ bool isVarArg;
+ KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
+ ReturnType(R), Params(P), isVarArg(V) {}
+ KeyTy(const KeyTy& that) :
+ ReturnType(that.ReturnType),
+ Params(that.Params),
+ isVarArg(that.isVarArg) {}
+ KeyTy(const FunctionType* FT) :
+ ReturnType(FT->getReturnType()),
+ Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
+ isVarArg(FT->isVarArg()) {}
+ bool operator==(const KeyTy& that) const {
+ if (ReturnType != that.ReturnType)
+ return false;
+ if (isVarArg != that.isVarArg)
+ return false;
+ if (Params != that.Params)
+ return false;
+ return true;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline FunctionType* getEmptyKey() {
+ return DenseMapInfo<FunctionType*>::getEmptyKey();
+ }
+ static inline FunctionType* getTombstoneKey() {
+ return DenseMapInfo<FunctionType*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(const KeyTy& Key) {
+ return hash_combine(Key.ReturnType,
+ hash_combine_range(Key.Params.begin(),
+ Key.Params.end()),
+ Key.isVarArg);
+ }
+ static unsigned getHashValue(const FunctionType *FT) {
+ return getHashValue(KeyTy(FT));
+ }
+ static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ return LHS == KeyTy(RHS);
+ }
+ static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a
+// shortcut to avoid comparing all operands.
+template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> {
+ static bool Equals(const MDNode &X, const FoldingSetNodeID &ID,
+ unsigned IDHash, FoldingSetNodeID &TempID) {
+ assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?");
+ // First, check if the cached hashes match. If they don't we can skip the
+ // expensive operand walk.
+ if (X.Hash != IDHash)
+ return false;
+
+ // If they match we have to compare the operands.
+ X.Profile(TempID);
+ return TempID == ID;
+ }
+ static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) {
+ return X.Hash; // Return cached hash.
+ }
+};
+
/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
/// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp.
class DebugRecVH : public CallbackVH {
@@ -129,7 +254,7 @@ public:
DenseMapAPFloatKeyInfo> FPMapTy;
FPMapTy FPConstants;
- StringMap<MDString*> MDStringCache;
+ StringMap<Value*> MDStringCache;
FoldingSet<MDNode> MDNodeSet;
// MDNodes may be uniqued or not uniqued. When they're not uniqued, they
@@ -138,23 +263,23 @@ public:
// on Context destruction.
SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
- ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
+ DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
- typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
- ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
+ typedef ConstantAggrUniqueMap<ArrayType, ConstantArray> ArrayConstantsTy;
ArrayConstantsTy ArrayConstants;
- typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
- StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
+ typedef ConstantAggrUniqueMap<StructType, ConstantStruct> StructConstantsTy;
StructConstantsTy StructConstants;
- typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
- VectorType, ConstantVector> VectorConstantsTy;
+ typedef ConstantAggrUniqueMap<VectorType, ConstantVector> VectorConstantsTy;
VectorConstantsTy VectorConstants;
- ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
- NullPtrConstants;
- ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
+ DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
+
+ DenseMap<Type*, UndefValue*> UVConstants;
+
+ StringMap<ConstantDataSequential*> CDSConstants;
+
DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
@@ -169,7 +294,7 @@ public:
LeakDetectorImpl<Value> LLVMObjects;
// Basic type instances.
- Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy;
+ Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
@@ -180,9 +305,10 @@ public:
DenseMap<unsigned, IntegerType*> IntegerTypes;
- // TODO: Optimize FunctionTypes/AnonStructTypes!
- std::map<std::vector<Type*>, FunctionType*> FunctionTypes;
- std::map<std::vector<Type*>, StructType*> AnonStructTypes;
+ typedef DenseMap<FunctionType*, bool, FunctionTypeKeyInfo> FunctionTypeMap;
+ FunctionTypeMap FunctionTypes;
+ typedef DenseMap<StructType*, bool, AnonStructTypeKeyInfo> StructTypeMap;
+ StructTypeMap AnonStructTypes;
StringMap<StructType*> NamedStructTypes;
unsigned NamedStructTypesUniqueID;
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ace4dc2de271..090b09a4ccd7 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -29,16 +29,19 @@ using namespace llvm;
// MDString implementation.
//
-MDString::MDString(LLVMContext &C, StringRef S)
- : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {}
+void MDString::anchor() { }
+
+MDString::MDString(LLVMContext &C)
+ : Value(Type::getMetadataTy(C), Value::MDStringVal) {}
MDString *MDString::get(LLVMContext &Context, StringRef Str) {
LLVMContextImpl *pImpl = Context.pImpl;
- StringMapEntry<MDString *> &Entry =
+ StringMapEntry<Value*> &Entry =
pImpl->MDStringCache.GetOrCreateValue(Str);
- MDString *&S = Entry.getValue();
- if (!S) S = new MDString(Context, Entry.getKey());
- return S;
+ Value *&S = Entry.getValue();
+ if (!S) S = new MDString(Context);
+ S->setValueName(&Entry);
+ return cast<MDString>(S);
}
//===----------------------------------------------------------------------===//
@@ -48,14 +51,26 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) {
// Use CallbackVH to hold MDNode operands.
namespace llvm {
class MDNodeOperand : public CallbackVH {
- MDNode *Parent;
+ MDNode *getParent() {
+ MDNodeOperand *Cur = this;
+
+ while (Cur->getValPtrInt() != 1)
+ --Cur;
+
+ assert(Cur->getValPtrInt() == 1 &&
+ "Couldn't find the beginning of the operand list!");
+ return reinterpret_cast<MDNode*>(Cur) - 1;
+ }
+
public:
- MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
+ MDNodeOperand(Value *V) : CallbackVH(V) {}
~MDNodeOperand() {}
- void set(Value *V) {
- setValPtr(V);
- }
+ void set(Value *V) { this->setValPtr(V); }
+
+ /// setAsFirstOperand - Accessor method to mark the operand as the first in
+ /// the list.
+ void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); }
virtual void deleted();
virtual void allUsesReplacedWith(Value *NV);
@@ -64,15 +79,13 @@ public:
void MDNodeOperand::deleted() {
- Parent->replaceOperand(this, 0);
+ getParent()->replaceOperand(this, 0);
}
void MDNodeOperand::allUsesReplacedWith(Value *NV) {
- Parent->replaceOperand(this, NV);
+ getParent()->replaceOperand(this, NV);
}
-
-
//===----------------------------------------------------------------------===//
// MDNode implementation.
//
@@ -85,6 +98,11 @@ static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
}
+void MDNode::replaceOperandWith(unsigned i, Value *Val) {
+ MDNodeOperand *Op = getOperandPtr(this, i);
+ replaceOperand(Op, Val);
+}
+
MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
NumOperands = Vals.size();
@@ -95,8 +113,13 @@ MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
// Initialize the operand list, which is co-allocated on the end of the node.
unsigned i = 0;
for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
- Op != E; ++Op, ++i)
- new (Op) MDNodeOperand(Vals[i], this);
+ Op != E; ++Op, ++i) {
+ new (Op) MDNodeOperand(Vals[i]);
+
+ // Mark the first MDNodeOperand as being the first in the list of operands.
+ if (i == 0)
+ Op->setAsFirstOperand(1);
+ }
}
@@ -161,12 +184,13 @@ static const Function *assertLocalFunction(const MDNode *N) {
const Function *MDNode::getFunction() const {
#ifndef NDEBUG
return assertLocalFunction(this);
-#endif
+#else
if (!isFunctionLocal()) return NULL;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
if (const Function *F = getFunctionForValue(getOperand(i)))
return F;
return NULL;
+#endif
}
// destroy - Delete this node. Only when there are no uses.
@@ -197,11 +221,11 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
ID.AddPointer(Vals[i]);
void *InsertPoint;
- MDNode *N = NULL;
-
- if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+ MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (N || !Insert)
return N;
-
+
bool isFunctionLocal = false;
switch (FL) {
case FL_Unknown:
@@ -226,6 +250,9 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
+ // Cache the operand hash.
+ N->Hash = ID.ComputeHash();
+
// InsertPoint will have been set by the FindNodeOrInsertPos call.
pImpl->MDNodeSet.InsertNode(N, InsertPoint);
@@ -349,6 +376,8 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
return;
}
+ // Cache the operand hash.
+ Hash = ID.ComputeHash();
// InsertPoint will have been set by the FindNodeOrInsertPos call.
pImpl->MDNodeSet.InsertNode(this, InsertPoint);
@@ -425,12 +454,12 @@ StringRef NamedMDNode::getName() const {
// Instruction Metadata method implementations.
//
-void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
if (Node == 0 && !hasMetadata()) return;
setMetadata(getContext().getMDKindID(Kind), Node);
}
-MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
return getMetadataImpl(getContext().getMDKindID(Kind));
}
@@ -468,9 +497,11 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
}
// Otherwise, we're removing metadata from an instruction.
- assert(hasMetadataHashEntry() &&
- getContext().pImpl->MetadataStore.count(this) &&
+ assert((hasMetadataHashEntry() ==
+ getContext().pImpl->MetadataStore.count(this)) &&
"HasMetadata bit out of date!");
+ if (!hasMetadataHashEntry())
+ return; // Nothing to remove!
LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
// Common case is removing the only entry.
@@ -541,17 +572,15 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
getContext().pImpl->MetadataStore.count(this) &&
"Shouldn't have called this");
const LLVMContextImpl::MDMapTy &Info =
- getContext().pImpl->MetadataStore.find(this)->second;
+ getContext().pImpl->MetadataStore.find(this)->second;
assert(!Info.empty() && "Shouldn't have called this");
-
Result.append(Info.begin(), Info.end());
-
+
// Sort the resulting array so it is stable.
if (Result.size() > 1)
array_pod_sort(Result.begin(), Result.end());
}
-
/// clearMetadataHashEntries - Clear all hashtable-based metadata from
/// this instruction.
void Instruction::clearMetadataHashEntries() {
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index c29029bf6c06..e8bc6dbe9706 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -321,11 +321,67 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
return NMD;
}
+/// eraseNamedMetadata - Remove the given NamedMDNode from this module and
+/// delete it.
void Module::eraseNamedMetadata(NamedMDNode *NMD) {
static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
NamedMDList.erase(NMD);
}
+/// getModuleFlagsMetadata - Returns the module flags in the provided vector.
+void Module::
+getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
+ const NamedMDNode *ModFlags = getModuleFlagsMetadata();
+ if (!ModFlags) return;
+
+ for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) {
+ MDNode *Flag = ModFlags->getOperand(i);
+ ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
+ MDString *Key = cast<MDString>(Flag->getOperand(1));
+ Value *Val = Flag->getOperand(2);
+ Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
+ Key, Val));
+ }
+}
+
+/// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. This method returns null if there are no
+/// module-level flags.
+NamedMDNode *Module::getModuleFlagsMetadata() const {
+ return getNamedMetadata("llvm.module.flags");
+}
+
+/// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. If module-level flags aren't found, it
+/// creates the named metadata that contains them.
+NamedMDNode *Module::getOrInsertModuleFlagsMetadata() {
+ return getOrInsertNamedMetadata("llvm.module.flags");
+}
+
+/// addModuleFlag - Add a module-level flag to the module-level flags
+/// metadata. It will create the module-level flags named metadata if it doesn't
+/// already exist.
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+ Value *Val) {
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Value *Ops[3] = {
+ ConstantInt::get(Int32Ty, Behavior), MDString::get(Context, Key), Val
+ };
+ getOrInsertModuleFlagsMetadata()->addOperand(MDNode::get(Context, Ops));
+}
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+ uint32_t Val) {
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ addModuleFlag(Behavior, Key, ConstantInt::get(Int32Ty, Val));
+}
+void Module::addModuleFlag(MDNode *Node) {
+ assert(Node->getNumOperands() == 3 &&
+ "Invalid number of operands for module flag!");
+ assert(isa<ConstantInt>(Node->getOperand(0)) &&
+ isa<MDString>(Node->getOperand(1)) &&
+ "Invalid operand types for module flag!");
+ getOrInsertModuleFlagsMetadata()->addOperand(Node);
+}
//===----------------------------------------------------------------------===//
// Methods to control the materialization of GlobalValues in the Module.
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 9afc54063321..994a7ffceea5 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -25,11 +25,9 @@ using namespace llvm;
// Pass Implementation
//
-Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
-
// Force out-of-line virtual method.
-Pass::~Pass() {
- delete Resolver;
+Pass::~Pass() {
+ delete Resolver;
}
// Force out-of-line virtual method.
@@ -48,7 +46,7 @@ bool Pass::mustPreserveAnalysisID(char &AID) const {
return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
}
-// dumpPassStructure - Implement the -debug-passes=Structure option
+// dumpPassStructure - Implement the -debug-pass=Structure option
void Pass::dumpPassStructure(unsigned Offset) {
dbgs().indent(Offset*2) << getPassName() << "\n";
}
@@ -71,7 +69,7 @@ void Pass::preparePassManager(PMStack &) {
PassManagerType Pass::getPotentialPassManagerType() const {
// Default implementation.
- return PMT_Unknown;
+ return PMT_Unknown;
}
void Pass::getAnalysisUsage(AnalysisUsage &) const {
@@ -155,9 +153,8 @@ PassManagerType FunctionPass::getPotentialPassManagerType() const {
Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
-
+
llvm_unreachable("BasicBlockPass printing unsupported.");
- return 0;
}
bool BasicBlockPass::doInitialization(Module &) {
@@ -181,7 +178,7 @@ bool BasicBlockPass::doFinalization(Module &) {
}
PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
- return PMT_BasicBlockPassManager;
+ return PMT_BasicBlockPassManager;
}
const PassInfo *Pass::lookupPassInfo(const void *TI) {
@@ -192,6 +189,13 @@ const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
return PassRegistry::getPassRegistry()->getPassInfo(Arg);
}
+Pass *Pass::createPass(AnalysisID ID) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+ if (!PI)
+ return NULL;
+ return PI->createPass();
+}
+
Pass *PassInfo::createPass() const {
assert((!isAnalysisGroup() || NormalCtor) &&
"No default implementation found for analysis group!");
@@ -246,7 +250,7 @@ namespace {
typedef AnalysisUsage::VectorType VectorType;
VectorType &CFGOnlyList;
GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
-
+
void passEnumerate(const PassInfo *P) {
if (P->isCFGOnlyPass())
CFGOnlyList.push_back(P->getTypeInfo());
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index ecedb1db66b0..28fbaa667841 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -14,7 +14,6 @@
#include "llvm/PassManagers.h"
#include "llvm/PassManager.h"
-#include "llvm/DebugInfoProbe.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CommandLine.h"
@@ -26,7 +25,6 @@
#include "llvm/Support/PassNameParser.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/ADT/StringMap.h"
#include <algorithm>
#include <map>
using namespace llvm;
@@ -84,32 +82,28 @@ PrintAfterAll("print-after-all",
/// This is a helper to determine whether to print IR before or
/// after a pass.
-static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
+static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
PassOptionList &PassesToPrint) {
- if (const llvm::PassInfo *PI =
- PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
- for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
- const llvm::PassInfo *PassInf = PassesToPrint[i];
- if (PassInf)
- if (PassInf->getPassArgument() == PI->getPassArgument()) {
- return true;
- }
- }
+ for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+ const llvm::PassInfo *PassInf = PassesToPrint[i];
+ if (PassInf)
+ if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ return true;
+ }
}
return false;
}
-
/// This is a utility to check whether a pass should have IR dumped
/// before it.
-static bool ShouldPrintBeforePass(const void *PassID) {
- return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
+static bool ShouldPrintBeforePass(const PassInfo *PI) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
}
/// This is a utility to check whether a pass should have IR dumped
/// after it.
-static bool ShouldPrintAfterPass(const void *PassID) {
- return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
+static bool ShouldPrintAfterPass(const PassInfo *PI) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
}
} // End of llvm namespace
@@ -223,6 +217,7 @@ namespace llvm {
class FunctionPassManagerImpl : public Pass,
public PMDataManager,
public PMTopLevelManager {
+ virtual void anchor();
private:
bool wasRun;
public:
@@ -263,27 +258,15 @@ public:
virtual PMDataManager *getAsPMDataManager() { return this; }
virtual Pass *getAsPass() { return this; }
+ virtual PassManagerType getTopLevelPassManagerType() {
+ return PMT_FunctionPassManager;
+ }
/// Pass Manager itself does not invalidate any analysis info.
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
}
- void addTopLevelPass(Pass *P) {
- if (ImmutablePass *IP = P->getAsImmutablePass()) {
- // P is a immutable pass and it will be managed by this
- // top level manager. Set up analysis resolver to connect them.
- AnalysisResolver *AR = new AnalysisResolver(*this);
- P->setResolver(AR);
- initializeAnalysisImpl(P);
- addImmutablePass(IP);
- recordAvailableAnalysis(IP);
- } else {
- P->assignPassManager(activeStack, PMT_FunctionPassManager);
- }
-
- }
-
FPPassManager *getContainedManager(unsigned N) {
assert(N < PassManagers.size() && "Pass number out of range!");
FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
@@ -291,6 +274,8 @@ public:
}
};
+void FunctionPassManagerImpl::anchor() {}
+
char FunctionPassManagerImpl::ID = 0;
//===----------------------------------------------------------------------===//
@@ -384,6 +369,7 @@ char MPPassManager::ID = 0;
class PassManagerImpl : public Pass,
public PMDataManager,
public PMTopLevelManager {
+ virtual void anchor();
public:
static char ID;
@@ -413,22 +399,11 @@ public:
Info.setPreservesAll();
}
- void addTopLevelPass(Pass *P) {
- if (ImmutablePass *IP = P->getAsImmutablePass()) {
- // P is a immutable pass and it will be managed by this
- // top level manager. Set up analysis resolver to connect them.
- AnalysisResolver *AR = new AnalysisResolver(*this);
- P->setResolver(AR);
- initializeAnalysisImpl(P);
- addImmutablePass(IP);
- recordAvailableAnalysis(IP);
- } else {
- P->assignPassManager(activeStack, PMT_ModulePassManager);
- }
- }
-
virtual PMDataManager *getAsPMDataManager() { return this; }
virtual Pass *getAsPass() { return this; }
+ virtual PassManagerType getTopLevelPassManagerType() {
+ return PMT_ModulePassManager;
+ }
MPPassManager *getContainedManager(unsigned N) {
assert(N < PassManagers.size() && "Pass number out of range!");
@@ -437,26 +412,14 @@ public:
}
};
+void PassManagerImpl::anchor() {}
+
char PassManagerImpl::ID = 0;
} // End of llvm namespace
namespace {
//===----------------------------------------------------------------------===//
-// DebugInfoProbe
-
-static DebugInfoProbeInfo *TheDebugProbe;
-static void createDebugInfoProbe() {
- if (TheDebugProbe) return;
-
- // Constructed the first time this is called. This guarantees that the
- // object will be constructed, if -enable-debug-info-probe is set,
- // before static globals, thus it will be destroyed before them.
- static ManagedStatic<DebugInfoProbeInfo> DIP;
- TheDebugProbe = &*DIP;
-}
-
-//===----------------------------------------------------------------------===//
/// TimingInfo Class - This class is used to calculate information about the
/// amount of time each pass takes to execute. This only happens when
/// -time-passes is enabled on the command line.
@@ -654,7 +617,32 @@ void PMTopLevelManager::schedulePass(Pass *P) {
}
// Now all required passes are available.
- addTopLevelPass(P);
+ if (ImmutablePass *IP = P->getAsImmutablePass()) {
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ PMDataManager *DM = getAsPMDataManager();
+ AnalysisResolver *AR = new AnalysisResolver(*DM);
+ P->setResolver(AR);
+ DM->initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ DM->recordAvailableAnalysis(IP);
+ return;
+ }
+
+ if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+ Pass *PP = P->createPrinterPass(
+ dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
+ PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+ }
+
+ // Add the requested pass to the best available pass manager.
+ P->assignPassManager(activeStack, getTopLevelPassManagerType());
+
+ if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+ Pass *PP = P->createPrinterPass(
+ dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
+ PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+ }
}
/// Find the pass that implements Analysis AID. Search immutable
@@ -1224,8 +1212,7 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
}
Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
- assert(0 && "Unable to find on the fly pass");
- return NULL;
+ llvm_unreachable("Unable to find on the fly pass");
}
// Destructor
@@ -1351,31 +1338,13 @@ FunctionPassManager::~FunctionPassManager() {
delete FPM;
}
-/// addImpl - Add a pass to the queue of passes to run, without
-/// checking whether to add a printer pass.
-void FunctionPassManager::addImpl(Pass *P) {
- FPM->add(P);
-}
-
/// add - Add a pass to the queue of passes to run. This passes
/// ownership of the Pass to the PassManager. When the
/// PassManager_X is destroyed, the pass will be destroyed as well, so
/// there is no need to delete the pass. (TODO delete passes.)
/// This implies that all passes MUST be allocated with 'new'.
void FunctionPassManager::add(Pass *P) {
- // If this is a not a function pass, don't add a printer for it.
- const void *PassID = P->getPassID();
- if (P->getPassKind() == PT_Function)
- if (ShouldPrintBeforePass(PassID))
- addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
- + P->getPassName() + " ***"));
-
- addImpl(P);
-
- if (P->getPassKind() == PT_Function)
- if (ShouldPrintAfterPass(PassID))
- addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
- + P->getPassName() + " ***"));
+ FPM->add(P);
}
/// run - Execute all of the passes scheduled for execution. Keep
@@ -1455,7 +1424,6 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
bool FunctionPassManagerImpl::run(Function &F) {
bool Changed = false;
TimingInfo::createTheTimeInfo();
- createDebugInfoProbe();
initializeAllAnalysisInfo();
for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
@@ -1474,7 +1442,7 @@ bool FunctionPassManagerImpl::run(Function &F) {
char FPPassManager::ID = 0;
/// Print passes managed by this manager
void FPPassManager::dumpPassStructure(unsigned Offset) {
- llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+ dbgs().indent(Offset*2) << "FunctionPass Manager\n";
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
FP->dumpPassStructure(Offset + 1);
@@ -1503,16 +1471,13 @@ bool FPPassManager::runOnFunction(Function &F) {
dumpRequiredSet(FP);
initializeAnalysisImpl(FP);
- if (TheDebugProbe)
- TheDebugProbe->initialize(FP, F);
+
{
PassManagerPrettyStackEntry X(FP, F);
TimeRegion PassTimer(getPassTimer(FP));
LocalChanged |= FP->runOnFunction(F);
}
- if (TheDebugProbe)
- TheDebugProbe->finalize(FP, F);
Changed |= LocalChanged;
if (LocalChanged)
@@ -1662,7 +1627,6 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
bool PassManagerImpl::run(Module &M) {
bool Changed = false;
TimingInfo::createTheTimeInfo();
- createDebugInfoProbe();
dumpArguments();
dumpPasses();
@@ -1687,27 +1651,12 @@ PassManager::~PassManager() {
delete PM;
}
-/// addImpl - Add a pass to the queue of passes to run, without
-/// checking whether to add a printer pass.
-void PassManager::addImpl(Pass *P) {
- PM->add(P);
-}
-
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
/// will be destroyed as well, so there is no need to delete the pass. This
/// implies that all passes MUST be allocated with 'new'.
void PassManager::add(Pass *P) {
- const void* PassID = P->getPassID();
- if (ShouldPrintBeforePass(PassID))
- addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
- + P->getPassName() + " ***"));
-
- addImpl(P);
-
- if (ShouldPrintAfterPass(PassID))
- addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
- + P->getPassName() + " ***"));
+ PM->add(P);
}
/// run - Execute all of the passes scheduled for execution. Keep track of
@@ -1817,7 +1766,7 @@ void ModulePass::assignPassManager(PMStack &PMS,
void FunctionPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
- // Find Module Pass Manager
+ // Find Function Pass Manager
while (!PMS.empty()) {
if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
PMS.pop();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 10184bc6f0e3..c6f35580e158 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
switch (IDNumber) {
case VoidTyID : return getVoidTy(C);
+ case HalfTyID : return getHalfTy(C);
case FloatTyID : return getFloatTy(C);
case DoubleTyID : return getDoubleTy(C);
case X86_FP80TyID : return getX86_FP80Ty(C);
@@ -57,7 +58,7 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
bool Type::isIntOrIntVectorTy() const {
if (isIntegerTy())
return true;
- if (ID != Type::VectorTyID) return false;
+ if (getTypeID() != Type::VectorTyID) return false;
return cast<VectorType>(this)->getElementType()->isIntegerTy();
}
@@ -65,11 +66,12 @@ bool Type::isIntOrIntVectorTy() const {
/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
///
bool Type::isFPOrFPVectorTy() const {
- if (ID == Type::FloatTyID || ID == Type::DoubleTyID ||
- ID == Type::FP128TyID || ID == Type::X86_FP80TyID ||
- ID == Type::PPC_FP128TyID)
+ if (getTypeID() == Type::HalfTyID || getTypeID() == Type::FloatTyID ||
+ getTypeID() == Type::DoubleTyID ||
+ getTypeID() == Type::FP128TyID || getTypeID() == Type::X86_FP80TyID ||
+ getTypeID() == Type::PPC_FP128TyID)
return true;
- if (ID != Type::VectorTyID) return false;
+ if (getTypeID() != Type::VectorTyID) return false;
return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
}
@@ -131,6 +133,7 @@ bool Type::isEmptyTy() const {
unsigned Type::getPrimitiveSizeInBits() const {
switch (getTypeID()) {
+ case Type::HalfTyID: return 16;
case Type::FloatTyID: return 32;
case Type::DoubleTyID: return 64;
case Type::X86_FP80TyID: return 80;
@@ -157,11 +160,12 @@ int Type::getFPMantissaWidth() const {
if (const VectorType *VTy = dyn_cast<VectorType>(this))
return VTy->getElementType()->getFPMantissaWidth();
assert(isFloatingPointTy() && "Not a floating point type!");
- if (ID == FloatTyID) return 24;
- if (ID == DoubleTyID) return 53;
- if (ID == X86_FP80TyID) return 64;
- if (ID == FP128TyID) return 113;
- assert(ID == PPC_FP128TyID && "unknown fp type");
+ if (getTypeID() == HalfTyID) return 11;
+ if (getTypeID() == FloatTyID) return 24;
+ if (getTypeID() == DoubleTyID) return 53;
+ if (getTypeID() == X86_FP80TyID) return 64;
+ if (getTypeID() == FP128TyID) return 113;
+ assert(getTypeID() == PPC_FP128TyID && "unknown fp type");
return -1;
}
@@ -181,24 +185,69 @@ bool Type::isSizedDerivedType() const {
if (!this->isStructTy())
return false;
- // Opaque structs have no size.
- if (cast<StructType>(this)->isOpaque())
- return false;
-
- // Okay, our struct is sized if all of the elements are.
- for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
- if (!(*I)->isSized())
- return false;
+ return cast<StructType>(this)->isSized();
+}
- return true;
+//===----------------------------------------------------------------------===//
+// Subclass Helper Methods
+//===----------------------------------------------------------------------===//
+
+unsigned Type::getIntegerBitWidth() const {
+ return cast<IntegerType>(this)->getBitWidth();
+}
+
+bool Type::isFunctionVarArg() const {
+ return cast<FunctionType>(this)->isVarArg();
+}
+
+Type *Type::getFunctionParamType(unsigned i) const {
+ return cast<FunctionType>(this)->getParamType(i);
+}
+
+unsigned Type::getFunctionNumParams() const {
+ return cast<FunctionType>(this)->getNumParams();
+}
+
+StringRef Type::getStructName() const {
+ return cast<StructType>(this)->getName();
+}
+
+unsigned Type::getStructNumElements() const {
+ return cast<StructType>(this)->getNumElements();
+}
+
+Type *Type::getStructElementType(unsigned N) const {
+ return cast<StructType>(this)->getElementType(N);
+}
+
+
+
+Type *Type::getSequentialElementType() const {
+ return cast<SequentialType>(this)->getElementType();
+}
+
+uint64_t Type::getArrayNumElements() const {
+ return cast<ArrayType>(this)->getNumElements();
+}
+
+unsigned Type::getVectorNumElements() const {
+ return cast<VectorType>(this)->getNumElements();
}
+unsigned Type::getPointerAddressSpace() const {
+ return cast<PointerType>(this)->getAddressSpace();
+}
+
+
+
+
//===----------------------------------------------------------------------===//
// Primitive 'Type' data
//===----------------------------------------------------------------------===//
Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
@@ -217,6 +266,10 @@ IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
return IntegerType::get(C, N);
}
+PointerType *Type::getHalfPtrTy(LLVMContext &C, unsigned AS) {
+ return getHalfTy(C)->getPointerTo(AS);
+}
+
PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
return getFloatTy(C)->getPointerTo(AS);
}
@@ -328,23 +381,20 @@ FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
// FunctionType::get - The factory function for the FunctionType class.
FunctionType *FunctionType::get(Type *ReturnType,
ArrayRef<Type*> Params, bool isVarArg) {
- // TODO: This is brutally slow.
- std::vector<Type*> Key;
- Key.reserve(Params.size()+2);
- Key.push_back(const_cast<Type*>(ReturnType));
- for (unsigned i = 0, e = Params.size(); i != e; ++i)
- Key.push_back(const_cast<Type*>(Params[i]));
- if (isVarArg)
- Key.push_back(0);
-
LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
- FunctionType *&FT = pImpl->FunctionTypes[Key];
-
- if (FT == 0) {
+ FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
+ LLVMContextImpl::FunctionTypeMap::iterator I =
+ pImpl->FunctionTypes.find_as(Key);
+ FunctionType *FT;
+
+ if (I == pImpl->FunctionTypes.end()) {
FT = (FunctionType*) pImpl->TypeAllocator.
- Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1),
+ Allocate(sizeof(FunctionType) + sizeof(Type*) * (Params.size() + 1),
AlignOf<FunctionType>::Alignment);
new (FT) FunctionType(ReturnType, Params, isVarArg);
+ pImpl->FunctionTypes[FT] = true;
+ } else {
+ FT = I->first;
}
return FT;
@@ -377,23 +427,22 @@ bool FunctionType::isValidArgumentType(Type *ArgTy) {
StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
bool isPacked) {
- // FIXME: std::vector is horribly inefficient for this probe.
- std::vector<Type*> Key;
- for (unsigned i = 0, e = ETypes.size(); i != e; ++i) {
- assert(isValidElementType(ETypes[i]) &&
- "Invalid type for structure element!");
- Key.push_back(ETypes[i]);
+ LLVMContextImpl *pImpl = Context.pImpl;
+ AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
+ LLVMContextImpl::StructTypeMap::iterator I =
+ pImpl->AnonStructTypes.find_as(Key);
+ StructType *ST;
+
+ if (I == pImpl->AnonStructTypes.end()) {
+ // Value not found. Create a new type!
+ ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ ST->setSubclassData(SCDB_IsLiteral); // Literal struct.
+ ST->setBody(ETypes, isPacked);
+ Context.pImpl->AnonStructTypes[ST] = true;
+ } else {
+ ST = I->first;
}
- if (isPacked)
- Key.push_back(0);
-
- StructType *&ST = Context.pImpl->AnonStructTypes[Key];
- if (ST) return ST;
-
- // Value not found. Create a new type!
- ST = new (Context.pImpl->TypeAllocator) StructType(Context);
- ST->setSubclassData(SCDB_IsLiteral); // Literal struct.
- ST->setBody(ETypes, isPacked);
+
return ST;
}
@@ -403,13 +452,13 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
setSubclassData(getSubclassData() | SCDB_HasBody);
if (isPacked)
setSubclassData(getSubclassData() | SCDB_Packed);
-
- Type **Elts = getContext().pImpl->
- TypeAllocator.Allocate<Type*>(Elements.size());
- memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size());
+
+ unsigned NumElements = Elements.size();
+ Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
+ memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
ContainedTys = Elts;
- NumContainedTys = Elements.size();
+ NumContainedTys = NumElements;
}
void StructType::setName(StringRef Name) {
@@ -434,9 +483,10 @@ void StructType::setName(StringRef Name) {
SmallString<64> TempStr(Name);
TempStr.push_back('.');
raw_svector_ostream TmpStream(TempStr);
+ unsigned NameSize = Name.size();
do {
- TempStr.resize(Name.size()+1);
+ TempStr.resize(NameSize + 1);
TmpStream.resync();
TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
@@ -520,6 +570,26 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
return llvm::StructType::create(Ctx, StructFields, Name);
}
+bool StructType::isSized() const {
+ if ((getSubclassData() & SCDB_IsSized) != 0)
+ return true;
+ if (isOpaque())
+ return false;
+
+ // Okay, our struct is sized if all of the elements are, but if one of the
+ // elements is opaque, the struct isn't sized *yet*, but may become sized in
+ // the future, so just bail out without caching.
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSized())
+ return false;
+
+ // Here we cheat a bit and cast away const-ness. The goal is to memoize when
+ // we find a sized type, as types can only move from opaque to sized, not the
+ // other way.
+ const_cast<StructType*>(this)->setSubclassData(
+ getSubclassData() | SCDB_IsSized);
+ return true;
+}
StringRef StructType::getName() const {
assert(!isLiteral() && "Literal structs never have names");
@@ -664,6 +734,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
}
bool VectorType::isValidElementType(Type *ElemTy) {
+ if (PointerType *PTy = dyn_cast<PointerType>(ElemTy))
+ ElemTy = PTy->getElementType();
return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
}
@@ -689,7 +761,12 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
PointerType::PointerType(Type *E, unsigned AddrSpace)
: SequentialType(PointerTyID, E) {
+#ifndef NDEBUG
+ const unsigned oldNCT = NumContainedTys;
+#endif
setSubclassData(AddrSpace);
+ // Check for miscompile. PR11652.
+ assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
}
PointerType *Type::getPointerTo(unsigned addrs) {
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index 359a1517ab79..0128adc3f776 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Value.h"
+#include <new>
namespace llvm {
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index f01fa349adfd..5f35ce4b9a4f 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -17,6 +17,8 @@ namespace llvm {
// User Class
//===----------------------------------------------------------------------===//
+void User::anchor() {}
+
// replaceUsesOfWith - Replaces all references to the "From" definition with
// references to the "To" definition.
//
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 2fa5f08a3e7f..4006b2c55418 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -66,7 +66,7 @@ Value::~Value() {
// a <badref>
//
if (!use_empty()) {
- dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+ dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
dbgs() << "Use still stuck around after Def is destroyed:"
<< **I << "\n";
@@ -76,7 +76,7 @@ Value::~Value() {
// If this value is named, destroy the name. This should not be in a symtab
// at this point.
- if (Name)
+ if (Name && SubclassID != MDStringVal)
Name->Destroy();
// There should be no uses of this object anymore, remove it.
@@ -108,6 +108,19 @@ bool Value::hasNUsesOrMore(unsigned N) const {
/// isUsedInBasicBlock - Return true if this value is used in the specified
/// basic block.
bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+ // Start by scanning over the instructions looking for a use before we start
+ // the expensive use iteration.
+ unsigned MaxBlockSize = 3;
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+ return true;
+ if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+ break;
+ }
+
+ if (MaxBlockSize != 0) // We scanned the entire block and found no use.
+ return false;
+
for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
const Instruction *User = dyn_cast<Instruction>(*I);
if (User && User->getParent() == BB)
@@ -156,11 +169,10 @@ StringRef Value::getName() const {
return Name->getKey();
}
-std::string Value::getNameStr() const {
- return getName().str();
-}
-
void Value::setName(const Twine &NewName) {
+ assert(SubclassID != MDStringVal &&
+ "Cannot set the name of MDString with this method!");
+
// Fast path for common IRBuilder case of setName("") when there is no name.
if (NewName.isTriviallyEmpty() && !hasName())
return;
@@ -219,6 +231,8 @@ void Value::setName(const Twine &NewName) {
/// takeName - transfer the name from V to this value, setting V's name to
/// empty. It is an error to call V->takeName(V).
void Value::takeName(Value *V) {
+ assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
+
ValueSymbolTable *ST = 0;
// If this value has a name, drop it.
if (hasName()) {
@@ -308,20 +322,40 @@ void Value::replaceAllUsesWith(Value *New) {
BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
}
-Value *Value::stripPointerCasts() {
- if (!getType()->isPointerTy())
- return this;
+namespace {
+// Various metrics for how much to strip off of pointers.
+enum PointerStripKind {
+ PSK_ZeroIndices,
+ PSK_InBoundsConstantIndices,
+ PSK_InBounds
+};
+
+template <PointerStripKind StripKind>
+static Value *stripPointerCastsAndOffsets(Value *V) {
+ if (!V->getType()->isPointerTy())
+ return V;
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
- Value *V = this;
Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->hasAllZeroIndices())
- return V;
+ switch (StripKind) {
+ case PSK_ZeroIndices:
+ if (!GEP->hasAllZeroIndices())
+ return V;
+ break;
+ case PSK_InBoundsConstantIndices:
+ if (!GEP->hasAllConstantIndices())
+ return V;
+ // fallthrough
+ case PSK_InBounds:
+ if (!GEP->isInBounds())
+ return V;
+ break;
+ }
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
@@ -337,10 +371,24 @@ Value *Value::stripPointerCasts() {
return V;
}
+} // namespace
+
+Value *Value::stripPointerCasts() {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+}
+
+Value *Value::stripInBoundsConstantOffsets() {
+ return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
+}
+
+Value *Value::stripInBoundsOffsets() {
+ return stripPointerCastsAndOffsets<PSK_InBounds>(this);
+}
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-bool Value::isDereferenceablePointer() const {
+static bool isDereferenceablePointer(const Value *V,
+ SmallPtrSet<const Value *, 32> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
// It's also not always safe to follow a bitcast, for example:
@@ -349,20 +397,22 @@ bool Value::isDereferenceablePointer() const {
// be handled using TargetData to check sizes and alignments though.
// These are obviously ok.
- if (isa<AllocaInst>(this)) return true;
+ if (isa<AllocaInst>(V)) return true;
// Global variables which can't collapse to null are ok.
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return !GV->hasExternalWeakLinkage();
// byval arguments are ok.
- if (const Argument *A = dyn_cast<Argument>(this))
+ if (const Argument *A = dyn_cast<Argument>(V))
return A->hasByValAttr();
-
+
// For GEPs, determine if the indexing lands within the allocated object.
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// Conservatively require that the base pointer be fully dereferenceable.
- if (!GEP->getOperand(0)->isDereferenceablePointer())
+ if (!Visited.insert(GEP->getOperand(0)))
+ return false;
+ if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
return false;
// Check the indices.
gep_type_iterator GTI = gep_type_begin(GEP);
@@ -396,6 +446,13 @@ bool Value::isDereferenceablePointer() const {
return false;
}
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+ SmallPtrSet<const Value *, 32> Visited;
+ return ::isDereferenceablePointer(this, Visited);
+}
+
/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
/// return the value in the PHI node corresponding to PredBB. If not, return
/// ourself. This is useful if you want to know the value something has in a
@@ -425,7 +482,7 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
setPrevPtr(List);
if (Next) {
Next->setPrevPtr(&Next);
- assert(VP == Next->VP && "Added to wrong list?");
+ assert(VP.getPointer() == Next->VP.getPointer() && "Added to wrong list?");
}
}
@@ -441,14 +498,14 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
/// AddToUseList - Add this ValueHandle to the use list for VP.
void ValueHandleBase::AddToUseList() {
- assert(VP && "Null pointer doesn't have a use list!");
+ assert(VP.getPointer() && "Null pointer doesn't have a use list!");
- LLVMContextImpl *pImpl = VP->getContext().pImpl;
+ LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
- if (VP->HasValueHandle) {
+ if (VP.getPointer()->HasValueHandle) {
// If this value already has a ValueHandle, then it must be in the
// ValueHandles map already.
- ValueHandleBase *&Entry = pImpl->ValueHandles[VP];
+ ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()];
assert(Entry != 0 && "Value doesn't have any handles?");
AddToExistingUseList(&Entry);
return;
@@ -462,10 +519,10 @@ void ValueHandleBase::AddToUseList() {
DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
- ValueHandleBase *&Entry = Handles[VP];
+ ValueHandleBase *&Entry = Handles[VP.getPointer()];
assert(Entry == 0 && "Value really did already have handles?");
AddToExistingUseList(&Entry);
- VP->HasValueHandle = true;
+ VP.getPointer()->HasValueHandle = true;
// If reallocation didn't happen or if this was the first insertion, don't
// walk the table.
@@ -477,14 +534,16 @@ void ValueHandleBase::AddToUseList() {
// Okay, reallocation did happen. Fix the Prev Pointers.
for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
E = Handles.end(); I != E; ++I) {
- assert(I->second && I->first == I->second->VP && "List invariant broken!");
+ assert(I->second && I->first == I->second->VP.getPointer() &&
+ "List invariant broken!");
I->second->setPrevPtr(&I->second);
}
}
/// RemoveFromUseList - Remove this ValueHandle from its current use list.
void ValueHandleBase::RemoveFromUseList() {
- assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!");
+ assert(VP.getPointer() && VP.getPointer()->HasValueHandle &&
+ "Pointer doesn't have a use list!");
// Unlink this from its use list.
ValueHandleBase **PrevPtr = getPrevPtr();
@@ -500,11 +559,11 @@ void ValueHandleBase::RemoveFromUseList() {
// If the Next pointer was null, then it is possible that this was the last
// ValueHandle watching VP. If so, delete its entry from the ValueHandles
// map.
- LLVMContextImpl *pImpl = VP->getContext().pImpl;
+ LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
- Handles.erase(VP);
- VP->HasValueHandle = false;
+ Handles.erase(VP.getPointer());
+ VP.getPointer()->HasValueHandle = false;
}
}
@@ -554,7 +613,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
// All callbacks, weak references, and assertingVHs should be dropped by now.
if (V->HasValueHandle) {
#ifndef NDEBUG // Only in +Asserts mode...
- dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+ dbgs() << "While deleting: " << *V->getType() << " %" << V->getName()
<< "\n";
if (pImpl->ValueHandles[V]->getKind() == Assert)
llvm_unreachable("An asserting value handle still pointed to this"
@@ -617,8 +676,8 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
case Tracking:
case Weak:
dbgs() << "After RAUW from " << *Old->getType() << " %"
- << Old->getNameStr() << " to " << *New->getType() << " %"
- << New->getNameStr() << "\n";
+ << Old->getName() << " to " << *New->getType() << " %"
+ << New->getName() << "\n";
llvm_unreachable("A tracking or weak value handle still pointed to the"
" old value!\n");
default:
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index e13bd7df7375..9a8e1859e2d4 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -87,8 +87,7 @@ unsigned EVT::getExtendedSizeInBits() const {
return ITy->getBitWidth();
if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
return VTy->getBitWidth();
- assert(false && "Unrecognized extended type!");
- return 0; // Suppress warnings.
+ llvm_unreachable("Unrecognized extended type!");
}
/// getEVTString - This function returns value type as a string, e.g. "i32".
@@ -101,13 +100,13 @@ std::string EVT::getEVTString() const {
if (isInteger())
return "i" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
- return "?";
case MVT::i1: return "i1";
case MVT::i8: return "i8";
case MVT::i16: return "i16";
case MVT::i32: return "i32";
case MVT::i64: return "i64";
case MVT::i128: return "i128";
+ case MVT::f16: return "f16";
case MVT::f32: return "f32";
case MVT::f64: return "f64";
case MVT::f80: return "f80";
@@ -134,12 +133,13 @@ std::string EVT::getEVTString() const {
case MVT::v4i64: return "v4i64";
case MVT::v8i64: return "v8i64";
case MVT::v2f32: return "v2f32";
+ case MVT::v2f16: return "v2f16";
case MVT::v4f32: return "v4f32";
case MVT::v8f32: return "v8f32";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::Metadata:return "Metadata";
- case MVT::untyped: return "untyped";
+ case MVT::Untyped: return "Untyped";
}
}
@@ -158,6 +158,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::i32: return Type::getInt32Ty(Context);
case MVT::i64: return Type::getInt64Ty(Context);
case MVT::i128: return IntegerType::get(Context, 128);
+ case MVT::f16: return Type::getHalfTy(Context);
case MVT::f32: return Type::getFloatTy(Context);
case MVT::f64: return Type::getDoubleTy(Context);
case MVT::f80: return Type::getX86_FP80Ty(Context);
@@ -180,6 +181,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
@@ -197,11 +199,11 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
default:
if (HandleUnknown) return MVT(MVT::Other);
llvm_unreachable("Unknown type!");
- return MVT::isVoid;
case Type::VoidTyID:
return MVT::isVoid;
case Type::IntegerTyID:
return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+ case Type::HalfTyID: return MVT(MVT::f16);
case Type::FloatTyID: return MVT(MVT::f32);
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 9564b7d71f6a..96492e44d56f 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1,4 +1,4 @@
-//===-- Verifier.cpp - Implement the Module Verifier -------------*- C++ -*-==//
+//===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -51,6 +51,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/InlineAsm.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
@@ -117,7 +118,6 @@ namespace {
struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
static char ID; // Pass ID, replacement for typeid
bool Broken; // Is this module found to be broken?
- bool RealPass; // Are we not being run by a PassManager?
VerifierFailureAction action;
// What to do if verification fails.
Module *Mod; // Module we are verifying right now
@@ -143,13 +143,13 @@ namespace {
const Value *PersonalityFn;
Verifier()
- : FunctionPass(ID), Broken(false), RealPass(true),
+ : FunctionPass(ID), Broken(false),
action(AbortProcessAction), Mod(0), Context(0), DT(0),
MessagesStr(Messages), PersonalityFn(0) {
initializeVerifierPass(*PassRegistry::getPassRegistry());
}
explicit Verifier(VerifierFailureAction ctn)
- : FunctionPass(ID), Broken(false), RealPass(true), action(ctn), Mod(0),
+ : FunctionPass(ID), Broken(false), action(ctn), Mod(0),
Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) {
initializeVerifierPass(*PassRegistry::getPassRegistry());
}
@@ -158,17 +158,14 @@ namespace {
Mod = &M;
Context = &M.getContext();
- // If this is a real pass, in a pass manager, we must abort before
- // returning back to the pass manager, or else the pass manager may try to
- // run other passes on the broken module.
- if (RealPass)
- return abortIfBroken();
- return false;
+ // We must abort before returning back to the pass manager, or else the
+ // pass manager may try to run other passes on the broken module.
+ return abortIfBroken();
}
bool runOnFunction(Function &F) {
// Get dominator information if we are being run by PassManager
- if (RealPass) DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTree>();
Mod = F.getParent();
if (!Context) Context = &F.getContext();
@@ -177,13 +174,9 @@ namespace {
InstsInThisBlock.clear();
PersonalityFn = 0;
- // If this is a real pass, in a pass manager, we must abort before
- // returning back to the pass manager, or else the pass manager may try to
- // run other passes on the broken module.
- if (RealPass)
- return abortIfBroken();
-
- return false;
+ // We must abort before returning back to the pass manager, or else the
+ // pass manager may try to run other passes on the broken module.
+ return abortIfBroken();
}
bool doFinalization(Module &M) {
@@ -214,8 +207,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredID(PreVerifyID);
- if (RealPass)
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTree>();
}
/// abortIfBroken - If the module is broken and we are supposed to abort on
@@ -225,7 +217,6 @@ namespace {
if (!Broken) return false;
MessagesStr << "Broken module found, ";
switch (action) {
- default: llvm_unreachable("Unknown action");
case AbortProcessAction:
MessagesStr << "compilation aborted!\n";
dbgs() << MessagesStr.str();
@@ -239,6 +230,7 @@ namespace {
MessagesStr << "compilation terminated.\n";
return true;
}
+ llvm_unreachable("Invalid action");
}
@@ -279,6 +271,7 @@ namespace {
void visitGetElementPtrInst(GetElementPtrInst &GEP);
void visitLoadInst(LoadInst &LI);
void visitStoreInst(StoreInst &SI);
+ void verifyDominatesUse(Instruction &I, unsigned i);
void visitInstruction(Instruction &I);
void visitTerminatorInst(TerminatorInst &I);
void visitBranchInst(BranchInst &BI);
@@ -547,7 +540,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
for (unsigned i = 0;
i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
- Assert1(!(MutI & (MutI - 1)), "Attributes " +
+ Assert1(MutI.isEmptyOrSingleton(), "Attributes " +
Attribute::getAsString(MutI) + " are incompatible!", V);
}
@@ -607,7 +600,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
for (unsigned i = 0;
i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
- Assert1(!(MutI & (MutI - 1)), "Attributes " +
+ Assert1(MutI.isEmptyOrSingleton(), "Attributes " +
Attribute::getAsString(MutI) + " are incompatible!", V);
}
}
@@ -812,11 +805,11 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
// have the same type as the switched-on value.
Type *SwitchTy = SI.getCondition()->getType();
SmallPtrSet<ConstantInt*, 32> Constants;
- for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
- Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+ Assert1(i.getCaseValue()->getType() == SwitchTy,
"Switch constants must all be same type as switch value!", &SI);
- Assert2(Constants.insert(SI.getCaseValue(i)),
- "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+ Assert2(Constants.insert(i.getCaseValue()),
+ "Duplicate integer as switch case", &SI, i.getCaseValue());
}
visitTerminatorInst(SI);
@@ -1035,8 +1028,19 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
- Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+ Assert1(SrcTy->getScalarType()->isPointerTy(),
+ "PtrToInt source must be pointer", &I);
+ Assert1(DestTy->getScalarType()->isIntegerTy(),
+ "PtrToInt result must be integral", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "PtrToInt type mismatch", &I);
+
+ if (SrcTy->isVectorTy()) {
+ VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+ VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+ "PtrToInt Vector width mismatch", &I);
+ }
visitInstruction(I);
}
@@ -1046,9 +1050,18 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
- Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
-
+ Assert1(SrcTy->getScalarType()->isIntegerTy(),
+ "IntToPtr source must be an integral", &I);
+ Assert1(DestTy->getScalarType()->isPointerTy(),
+ "IntToPtr result must be a pointer",&I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "IntToPtr type mismatch", &I);
+ if (SrcTy->isVectorTy()) {
+ VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+ VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+ "IntToPtr Vector width mismatch", &I);
+ }
visitInstruction(I);
}
@@ -1245,7 +1258,7 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+ Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
"Invalid operand types for ICmp instruction", &IC);
// Check that the predicate is valid.
Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
@@ -1295,17 +1308,41 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
}
void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
- Assert1(cast<PointerType>(GEP.getOperand(0)->getType())
- ->getElementType()->isSized(),
+ Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
+
+ Assert1(isa<PointerType>(TargetTy),
+ "GEP base pointer is not a vector or a vector of pointers", &GEP);
+ Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
"GEP into unsized type!", &GEP);
-
+
SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
Type *ElTy =
- GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs);
+ GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert2(GEP.getType()->isPointerTy() &&
- cast<PointerType>(GEP.getType())->getElementType() == ElTy,
- "GEP is not of right type for indices!", &GEP, ElTy);
+
+ if (GEP.getPointerOperandType()->isPointerTy()) {
+ // Validate GEPs with scalar indices.
+ Assert2(GEP.getType()->isPointerTy() &&
+ cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+ "GEP is not of right type for indices!", &GEP, ElTy);
+ } else {
+ // Validate GEPs with a vector index.
+ Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP);
+ Value *Index = Idxs[0];
+ Type *IndexTy = Index->getType();
+ Assert1(IndexTy->isVectorTy(),
+ "Vector GEP must have vector indices!", &GEP);
+ Assert1(GEP.getType()->isVectorTy(),
+ "Vector GEP must return a vector value", &GEP);
+ Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType();
+ Assert1(ElemPtr->isPointerTy(),
+ "Vector GEP pointer operand is not a pointer!", &GEP);
+ unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements();
+ unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements();
+ Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+ Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(),
+ "Vector GEP type does not match pointer type!", &GEP);
+ }
visitInstruction(GEP);
}
@@ -1324,6 +1361,25 @@ void Verifier::visitLoadInst(LoadInst &LI) {
Assert1(LI.getSynchScope() == CrossThread,
"Non-atomic load cannot have SynchronizationScope specified", &LI);
}
+
+ if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) {
+ unsigned NumOperands = Range->getNumOperands();
+ Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+ unsigned NumRanges = NumOperands / 2;
+ Assert1(NumRanges >= 1, "It should have at least one range!", Range);
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
+ Assert1(Low, "The lower limit must be an integer!", Low);
+ ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
+ Assert1(High, "The upper limit must be an integer!", High);
+ Assert1(High->getType() == Low->getType() &&
+ High->getType() == ElTy, "Range types must match load type!",
+ &LI);
+ Assert1(High->getValue() != Low->getValue(), "Range must not be empty!",
+ Range);
+ }
+ }
+
visitInstruction(LI);
}
@@ -1468,6 +1524,58 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
visitInstruction(LPI);
}
+void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
+ Instruction *Op = cast<Instruction>(I.getOperand(i));
+ BasicBlock *BB = I.getParent();
+ BasicBlock *OpBlock = Op->getParent();
+ PHINode *PN = dyn_cast<PHINode>(&I);
+
+ // DT can handle non phi instructions for us.
+ if (!PN) {
+ // Definition must dominate use unless use is unreachable!
+ Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB) ||
+ DT->dominates(Op, &I),
+ "Instruction does not dominate all uses!", Op, &I);
+ return;
+ }
+
+ // Check that a definition dominates all of its uses.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+ // Invoke results are only usable in the normal destination, not in the
+ // exceptional destination.
+ BasicBlock *NormalDest = II->getNormalDest();
+
+
+ // PHI nodes differ from other nodes because they actually "use" the
+ // value in the predecessor basic blocks they correspond to.
+ BasicBlock *UseBlock = BB;
+ unsigned j = PHINode::getIncomingValueNumForOperand(i);
+ UseBlock = PN->getIncomingBlock(j);
+ Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
+ Op, &I);
+
+ if (UseBlock == OpBlock) {
+ // Special case of a phi node in the normal destination or the unwind
+ // destination.
+ Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
+ "Invoke result not available in the unwind destination!",
+ Op, &I);
+ } else {
+ Assert2(DT->dominates(II, UseBlock) ||
+ !DT->isReachableFromEntry(UseBlock),
+ "Invoke result does not dominate all uses!", Op, &I);
+ }
+ }
+
+ // PHI nodes are more difficult than other nodes because they actually
+ // "use" the value in the predecessor basic blocks they correspond to.
+ unsigned j = PHINode::getIncomingValueNumForOperand(i);
+ BasicBlock *PredBB = PN->getIncomingBlock(j);
+ Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
+ !DT->isReachableFromEntry(PredBB)),
+ "Instruction does not dominate all uses!", Op, &I);
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -1536,84 +1644,30 @@ void Verifier::visitInstruction(Instruction &I) {
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
Assert1(GV->getParent() == Mod, "Referencing global in another module!",
&I);
- } else if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
- BasicBlock *OpBlock = Op->getParent();
-
- // Check that a definition dominates all of its uses.
- if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
- // Invoke results are only usable in the normal destination, not in the
- // exceptional destination.
- BasicBlock *NormalDest = II->getNormalDest();
-
- Assert2(NormalDest != II->getUnwindDest(),
- "No uses of invoke possible due to dominance structure!",
- Op, &I);
-
- // PHI nodes differ from other nodes because they actually "use" the
- // value in the predecessor basic blocks they correspond to.
- BasicBlock *UseBlock = BB;
- if (PHINode *PN = dyn_cast<PHINode>(&I)) {
- unsigned j = PHINode::getIncomingValueNumForOperand(i);
- UseBlock = PN->getIncomingBlock(j);
- }
- Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
- Op, &I);
-
- if (isa<PHINode>(I) && UseBlock == OpBlock) {
- // Special case of a phi node in the normal destination or the unwind
- // destination.
- Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
- "Invoke result not available in the unwind destination!",
- Op, &I);
- } else {
- Assert2(DT->dominates(NormalDest, UseBlock) ||
- !DT->isReachableFromEntry(UseBlock),
- "Invoke result does not dominate all uses!", Op, &I);
-
- // If the normal successor of an invoke instruction has multiple
- // predecessors, then the normal edge from the invoke is critical,
- // so the invoke value can only be live if the destination block
- // dominates all of it's predecessors (other than the invoke).
- if (!NormalDest->getSinglePredecessor() &&
- DT->isReachableFromEntry(UseBlock))
- // If it is used by something non-phi, then the other case is that
- // 'NormalDest' dominates all of its predecessors other than the
- // invoke. In this case, the invoke value can still be used.
- for (pred_iterator PI = pred_begin(NormalDest),
- E = pred_end(NormalDest); PI != E; ++PI)
- if (*PI != II->getParent() && !DT->dominates(NormalDest, *PI) &&
- DT->isReachableFromEntry(*PI)) {
- CheckFailed("Invoke result does not dominate all uses!", Op,&I);
- return;
- }
- }
- } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
- // PHI nodes are more difficult than other nodes because they actually
- // "use" the value in the predecessor basic blocks they correspond to.
- unsigned j = PHINode::getIncomingValueNumForOperand(i);
- BasicBlock *PredBB = PN->getIncomingBlock(j);
- Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
- !DT->isReachableFromEntry(PredBB)),
- "Instruction does not dominate all uses!", Op, &I);
- } else {
- if (OpBlock == BB) {
- // If they are in the same basic block, make sure that the definition
- // comes before the use.
- Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB),
- "Instruction does not dominate all uses!", Op, &I);
- }
-
- // Definition must dominate use unless use is unreachable!
- Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, &I) ||
- !DT->isReachableFromEntry(BB),
- "Instruction does not dominate all uses!", Op, &I);
- }
+ } else if (isa<Instruction>(I.getOperand(i))) {
+ verifyDominatesUse(I, i);
} else if (isa<InlineAsm>(I.getOperand(i))) {
Assert1((i + 1 == e && isa<CallInst>(I)) ||
(i + 3 == e && isa<InvokeInst>(I)),
"Cannot take the address of an inline asm!", &I);
}
}
+
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpaccuracy)) {
+ Assert1(I.getType()->isFPOrFPVectorTy(),
+ "fpaccuracy requires a floating point result!", &I);
+ Assert1(MD->getNumOperands() == 1, "fpaccuracy takes one operand!", &I);
+ ConstantFP *Op = dyn_cast_or_null<ConstantFP>(MD->getOperand(0));
+ Assert1(Op, "fpaccuracy ULPs not a floating point number!", &I);
+ APFloat ULPs = Op->getValueAPF();
+ Assert1(ULPs.isNormal() || ULPs.isZero(),
+ "fpaccuracy ULPs not a normal number!", &I);
+ Assert1(!ULPs.isNegative(), "fpaccuracy ULPs is negative!", &I);
+ }
+
+ MDNode *MD = I.getMetadata(LLVMContext::MD_range);
+ Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+
InstsInThisBlock.insert(&I);
}
@@ -1642,6 +1696,12 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
switch (ID) {
default:
break;
+ case Intrinsic::ctlz: // llvm.ctlz
+ case Intrinsic::cttz: // llvm.cttz
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+ "is_zero_undef argument of bit counting intrinsics must be a "
+ "constant int", &CI);
+ break;
case Intrinsic::dbg_declare: { // llvm.dbg.declare
Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
"invalid llvm.dbg.declare intrinsic call 1", &CI);